In [1]:
from data_utils.dataset_processing import ClimateDataset, TSVDataset

dataset = "./data_utils/unlabelled_articles_17K/opinion_climate_all_with_bias.csv"
annotated_id_file = "./data_utils/annotated_data_500/pretty_0611_lcad.txt"
annotated_file = "./data_utils/annotated_data_500/0611_majority.json"

entire_dataset = ClimateDataset(dataset, annotated_id_file, annotated_file)
labelled_dataset = TSVDataset('./data_utils/annotated_data_500/final_dataset_v2.tsv')

In [2]:
from torch.utils.data import Subset
from utils import Evaluator

fold = 5

train_set = Subset(labelled_dataset, [i for i in range((-1 + fold)%5, len(labelled_dataset), 5)] + [i for i in range((0 + fold)%5, len(labelled_dataset), 5)] + [i for i in range((1 + fold)%5, len(labelled_dataset), 5)])
valid_set = Subset(labelled_dataset, [i for i in range((2 + fold)%5, len(labelled_dataset), 5)])
test_set  = Subset(labelled_dataset, [i for i in range((3 + fold)%5, len(labelled_dataset), 5)])

evaluator = Evaluator(classifier='naive', mode='macro', detail=True)

len(train_set), len(valid_set), len(test_set)

(257, 86, 85)

In [1]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-mpnet-base-v2")

total = sum([param.nelement() for param in model.parameters()])
print('parameters:', total)

parameters: 109486464


In [3]:
queries = ['attribution of responsibility', 'human interest', 'morality', 'conflict', 'economy']

In [4]:
query_embeddings = model.encode(queries)

print(query_embeddings)

[[ 0.01343607  0.03816285  0.02155725 ...  0.03995542  0.0181604
  -0.02009618]
 [ 0.01507804  0.08343946 -0.00394326 ...  0.05272075  0.00399472
   0.01211869]
 [ 0.02066687  0.06011951  0.0280279  ...  0.0401177   0.03867239
   0.0159173 ]
 [ 0.05329098  0.01754071 -0.00333661 ...  0.03607184  0.02508171
   0.03295524]
 [-0.04011215  0.11893874  0.01274614 ... -0.01535319 -0.00672154
   0.00540853]]


In [5]:
from nltk.tokenize import sent_tokenize

sentences = sent_tokenize(labelled_dataset[2][0])
embeddings = model.encode(sentences)
print(labelled_dataset[2][1])

[0, 1, 0, 1, 1]


In [6]:
cos_sim = util.cos_sim(query_embeddings, embeddings)

all_sentence_combinations = []
for i in range(len(queries)):
    for j in range(len(sentences)):
        all_sentence_combinations.append([cos_sim[i][j], i, j])

all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
    print("{} \t {} \t {:.4f}".format(queries[i], sentences[j], score))

Top-5 most similar pairs:
economy 	 The situation is chaotic people are running on the streets and buildings collapsed. 	 0.2419
conflict 	 The situation is chaotic people are running on the streets and buildings collapsed. 	 0.2398
conflict 	 Video on social media shows people screaming and fleeing in panic and a mosque amongst the buildings damaged. 	 0.1565
economy 	 Last month a series of earthquakes struck the Indonesian island of Lombok killing hundreds of people the biggest on 5 August killed more than 460. 	 0.1465
attribution of responsibility 	 There is a ship washed ashore she added. 	 0.1386


# Zero Shot Performance

In [84]:
from torch.utils.data import Subset
fold = 1
test_set  = Subset(labelled_dataset, [i for i in range((3 + fold)%5, len(labelled_dataset), 5)])

In [9]:
def calc_em(input_dict):
    preds = input_dict['y_pred']
    truths = input_dict['y_true']
    correct_1 = 0
    total_1 = 0
    correct_2 = 0
    total_2 = 0
    correct_a = 0
    total_a = 0

    for i in range(len(truths)):
        if sum(truths[i]) == 1:
            total_1 += 1
            if preds[i] == truths[i]:
                correct_1 += 1

        elif sum(truths[i]) == 2:
            total_2 += 1
            if preds[i] == truths[i]:
                correct_2 += 1

        total_a += 1
        if preds[i] == truths[i]:
            correct_a += 1

    return {'em-1': correct_1/total_1, 'em-2': correct_2/total_2, 'em-a': correct_a/total_a}

In [10]:
def find_frame_with_best_n(text, n):
    sentences = sent_tokenize(text)
    embeddings = model.encode(sentences)

    cos_sim = util.cos_sim(query_embeddings, embeddings)

    all_sentence_combinations = []
    for i in range(len(queries)):
        for j in range(len(sentences)):
            all_sentence_combinations.append([cos_sim[i][j], i, j])

    #Sort list by the highest cosine similarity score
    all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

    output = [0, 0, 0, 0, 0]
    for score, i, j in all_sentence_combinations[:n]:
        output[i] = 1

    return output

In [75]:
n = 2
preds = []
truths = []
for i in range(0, len(test_set)):
    preds.append(find_frame_with_best_n(test_set[i][0], n))
    truths.append(test_set[i][1])

input_dict = {"y_true": truths, "y_pred": preds}

from utils import Evaluator

evaluator = Evaluator(classifier='naive', mode='macro', detail=True)   
print(calc_em(input_dict))
evaluator.eval(input_dict)

{'em-1': 0.1, 'em-2': 0.07407407407407407, 'em-a': 0.04}


{'Precision': [0.34782608695652173,
  0.20588235294117646,
  0.09090909090909091,
  0.6730769230769231,
  0.4107142857142857],
 'Recall': [0.18604651162790697,
  0.25,
  0.09090909090909091,
  0.546875,
  0.6571428571428571],
 'F1': [0.24242424242424243,
  0.22580645161290322,
  0.09090909090909091,
  0.603448275862069,
  0.5054945054945055],
 'Acc': [0.5, 0.52, 0.8, 0.54, 0.55]}

In [76]:
def find_frame(text, threshold):
    sentences = sent_tokenize(text)
    embeddings = model.encode(sentences)

    cos_sim = util.cos_sim(query_embeddings, embeddings)

    all_sentence_combinations = []
    for i in range(len(queries)):
        for j in range(len(sentences)):
            all_sentence_combinations.append([cos_sim[i][j], i, j])

    #Sort list by the highest cosine similarity score
    all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

    output = [0, 0, 0, 0, 0]
    for score, i, j in all_sentence_combinations:
        if score < threshold[i]:
            break
        else:
            output[queries.index(queries[i])] = 1

    return output

In [83]:
threshold = [0.17] * 5

preds = []
truths = []
for i in range(0, len(test_set)):
    preds.append(find_frame(test_set[i][0], threshold))
    truths.append(test_set[i][1])

input_dict = {"y_true": truths, "y_pred": preds}
print(calc_em(input_dict))
from utils import Evaluator

evaluator = Evaluator(classifier='naive', mode='macro', detail=True)   

evaluator.eval(input_dict)

{'em-1': 0.0, 'em-2': 0.030303030303030304, 'em-a': 0.0297029702970297}


{'Precision': [0.4810126582278481,
  0.2619047619047619,
  0.0847457627118644,
  0.7444444444444445,
  0.3723404255319149],
 'Recall': [0.76, 0.7857142857142857, 1.0, 0.9305555555555556, 1.0],
 'F1': [0.5891472868217054,
  0.39285714285714285,
  0.15625,
  0.8271604938271605,
  0.5426356589147286],
 'Acc': [0.4752475247524752,
  0.32673267326732675,
  0.46534653465346537,
  0.7227722772277227,
  0.4158415841584158]}

# Weakly supervised

In [81]:
from torch.utils.data import Subset
from utils import Evaluator

fold = 5

train_set = Subset(labelled_dataset, [i for i in range((-1 + fold)%5, len(labelled_dataset), 5)] + [i for i in range((0 + fold)%5, len(labelled_dataset), 5)] + [i for i in range((1 + fold)%5, len(labelled_dataset), 5)])
valid_set = Subset(labelled_dataset, [i for i in range((2 + fold)%5, len(labelled_dataset), 5)])
test_set  = Subset(labelled_dataset, [i for i in range((3 + fold)%5, len(labelled_dataset), 5)])

evaluator = Evaluator(classifier='naive', mode='macro', detail=True)

len(train_set), len(valid_set), len(test_set)

(302, 100, 100)

In [101]:
def get_threshold(text, label):
    sentences = sent_tokenize(text)
    embeddings = model.encode(sentences)

    cos_sim = util.cos_sim(query_embeddings, embeddings)

    all_sentence_combinations = []
    for i in range(len(queries)):
        for j in range(len(sentences)):
            all_sentence_combinations.append([cos_sim[i][j], i, j])

    #Sort list by the highest cosine similarity score
    all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

    output_pos_max = [0, 0, 0, 0, 0]
    output_neg_max = [0, 0, 0, 0, 0]
    output_pos_min = [1, 1, 1, 1, 1]
    output_neg_min = [1, 1, 1, 1, 1]
    for score, i, j in all_sentence_combinations:
        if label[i] == 1:
            output_pos_max[i] = max(score.item(), output_pos_max[i])
            output_pos_min[i] = min(score.item(), output_pos_min[i])
        else:
            output_neg_max[i] = max(score.item(), output_neg_max[i])
            output_neg_min[i] = min(score.item(), output_neg_min[i])
    
    for i in range(0, 5):
        if output_pos_min[i] == 1:
            output_pos_min[i] = 0
        if output_neg_min[i] == 1:
            output_neg_min[i] = 0
    
    return output_pos_max, output_neg_max, output_pos_min, output_neg_min

In [104]:
import numpy as np
threshold_sum_pos_max = []
threshold_sum_neg_max = []
threshold_sum_pos_min = []
threshold_sum_neg_min = []
pos_num = np.array([0, 0, 0, 0, 0])


for i in range(0, len(train_set)):
    pos_max, neg_max, pos_min, neg_min = get_threshold(train_set[i][0], train_set[i][1])
    threshold_sum_pos_max.append(pos_max)
    threshold_sum_neg_max.append(neg_max)
    threshold_sum_pos_min.append(pos_min)
    threshold_sum_neg_min.append(neg_min)
    pos_num += np.array(train_set[i][1])

neg_num = np.array([len(train_set)] *5) - pos_num

threshold_sum_pos_max = np.array(threshold_sum_pos_max)

threshold_sum_pos_max.shape

(302, 5)

In [107]:
threshold = [(max(threshold_sum_pos_max[:,i]) - min(threshold_sum_pos_max[:,i]))*0.8 + min(threshold_sum_pos_max[:,i]) for i in range(0,5)]
threshold

[0.31803216934204104,
 0.30239362716674806,
 0.27441158294677737,
 0.42875185012817385,
 0.43044724464416506]

In [108]:

preds = []
truths = []
for i in range(0, len(test_set)):
    preds.append(find_frame(test_set[i][0], threshold))
    truths.append(test_set[i][1])

input_dict = {"y_true": truths, "y_pred": preds}
evaluator.eval(input_dict)

{'Precision': [0.5, 0.125, 0.2, 1.0, 0],
 'Recall': [0.06976744186046512,
  0.03571428571428571,
  0.09090909090909091,
  0.015625,
  0],
 'F1': [0.12244897959183673,
  0.05555555555555556,
  0.12500000000000003,
  0.03076923076923077,
  0],
 'Acc': [0.57, 0.66, 0.86, 0.37, 0.64]}