In [1]:
# Imports
import csv
import pandas as pd
import numpy as np
import os
import sklearn
from sklearn import metrics
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/melanie/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
# Load NewsAgendas, skip articles without an annotated agenda score
full_data = pd.read_json('newsagendas.jsonl', lines=True)
full_data = full_data[full_data['annotated-agenda-score'] != 'no answer']
keep_indices = list(full_data.index)
print('NewsAgendas with agenda scores', full_data.shape)

NewsAgendas with agenda scores (461, 10)


In [3]:
full_data.head()

Unnamed: 0,id,article-title,article-contents,annotated-labels,annotated-agenda-score,annotated-evidence,split,weak-label-0,weak-label-1,weak-label-2
0,0,More women than ever are freezing their eggs w...,"NewsBioethics</br></br>LEICESTER, England, Sep...",clickbait,2,{'clickbait': ['There are a variety of reasons...,test,clickbait,bias,
2,2,Nikki Haley Blasts UN for Anti-Israel Vote - S...,U.S. Ambassador to the United Nations Nikki Ha...,politicalbias clickbait propaganda,4,{'clickbait': ['This is not the Muslim apologi...,test,propaganda,bias,
3,3,Why JPMorgan Believes Central Banks Can No Lon...,"In recent weeks, JPMorgan has turned decidedly...",conspiracytheory,3,"{'conspiracytheory': ['But, to us, the fundame...",test,conspiracy,,
4,4,Celebrities To Leave US After Election of Raci...,By Ivan Fernando</br></br>On November 8th ever...,politicalbias,2,{'politicalbias': ['On November 8th everything...,test,satire,,
5,5,Fmr. Congressman and Korean War Vet Calls for ...,"""As a partially disabled army veteran myself, ...",politicalbias,2,{'politicalbias': ['Shamansky took on the Bush...,test,political,clickbait,


In [4]:
# Gather maliciousness scores
full_labels = list(full_data['annotated-agenda-score'])

In [19]:
# Function to generate sentiment scores
def gen_sentiment_labels(df):
    sia = SentimentIntensityAnalyzer()
    df['text'] = df['article-title'] + ' ' + df['article-contents']
    pred_scores, true_scores = [], []
    for row in df.iterrows():
        if str(row[1]['annotated-labels']) == 'nan':
            tags = ''
        else:
            tags = row[1]['annotated-labels'].strip().split(' ')
        score = sia.polarity_scores(row[1]['text'])
        pred_scores.append(int(score['compound'] < 0))
        true_scores.append(int('negativesentiment' in tags))
    return pred_scores, true_scores

In [6]:
# Function to gather weak labels
def get_weak_labels(df):
    weak_label0s = list(df['weak-label-0'])
    weak_label1s = list(df['weak-label-1'])
    weak_label2s = list(df['weak-label-2'])
    weak_labels_mess = np.array([weak_label0s, weak_label1s, weak_label2s]).transpose()
    weak_labels = []
    for x in weak_labels_mess:
        row = []
        for y in x:
            y = y.strip()
            if y and y != 'nan':
                row.append(y)
        weak_labels.append(row)
    return weak_labels

In [7]:
# Function to gather annotated labels
def get_annot_labels(df):
    annot_labels = list(df['annotated-labels'])
    annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()
    annot_labels = list(annot_labels)
    return annot_labels

In [20]:
# Features of interest
features = ['hatespeech', 'junkscience', 'propaganda', 'satire', 'clickbait', 'conspiracytheory']
weak_label_key = {
    "hatespeech":"hate",
    "junkscience":"junksci",
    "propaganda":"propaganda",
    "satire":"satire",
    "clickbait":"clickbait",
    "conspiracytheory":"conspiracy"
}

In [9]:
# Function to load a model predictions file, align index
def load_pred_file(path):
    with open(path, 'r') as f:
        data = [eval(x.strip()) for x in f.readlines()]
    full_preds = np.array([int(x['predicted_label']) for x in data if x['annotation_id'].startswith('old')])
    full_preds = list(full_preds[keep_indices])
    return full_preds

In [10]:
# Function to calculate scores for model predictions
def score_preds(true, preds):
    precision0 = metrics.precision_score(true, preds, pos_label=0, average='binary')
    precision1 = metrics.precision_score(true, preds, pos_label=1, average='binary')
    recall0 = metrics.recall_score(true, preds, pos_label=0, average='binary')
    recall1 = metrics.recall_score(true, preds, pos_label=1, average='binary')
    accuracy = metrics.accuracy_score(true, preds)
    balacc = metrics.accuracy_score(true, preds)
    f1_0 = metrics.f1_score(true, preds, pos_label=0, average='binary')
    f1_1 = metrics.f1_score(true, preds, pos_label=1, average='binary')
    iou = metrics.jaccard_score(true, preds)
    f1macro = metrics.f1_score(true, preds, average='macro')
    results = {
        'Precision':[round(precision0*100, 1), round(precision1*100, 1)],
        'Recall':[round(recall0*100, 1), round(recall1*100, 1)],
        'F1': [round(f1_0*100, 1), round(f1_1*100, 1)],
        'Accuracy':[round(accuracy*100, 1), -1],
        'Balanced Accuracy': [round(balacc*100, 1), -1],
        'F1-Macro':[round(f1macro*100, 1), -1],
        'iou':[round(iou*100, 1), -1],
    }
    df = pd.DataFrame(results)
    return df

In [21]:
#Model prediction results against weak labels
for feat in features:
    print(f'\n----- {feat} -----')
    bert_path = os.path.join('results', f'{feat}_bert_predictions.json')
    fresh_path = os.path.join('results', f'{feat}_fresh_predictions.json') 
    bert_full_labels = load_pred_file(bert_path)
    fresh_full_labels = load_pred_file(fresh_path)
    weakfeat = weak_label_key[feat]
    true_labels = [weakfeat in x for x in get_weak_labels(full_data)]
    print(f'% labeled 1: {round(sum(true_labels)*100/len(true_labels),1)}\n')
    print('BERT')
    bert_results = score_preds(true_labels, bert_full_labels)
    print(bert_results)
    print('FRESH')
    fresh_results = score_preds(true_labels, fresh_full_labels)
    print(fresh_results)


----- hatespeech -----
% labeled 1: 15.4

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       84.6    88.7  86.6      76.8               76.8      49.8  7.0
1       15.4    11.3  13.0      -1.0               -1.0      -1.0 -1.0
FRESH
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       83.7    87.9  85.8      75.3               75.3      46.2  3.4
1        7.8     5.6   6.6      -1.0               -1.0      -1.0 -1.0

----- junkscience -----
% labeled 1: 4.3

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       95.4    94.1  94.7      90.0               90.0      47.4  0.0
1        0.0     0.0   0.0      -1.0               -1.0      -1.0 -1.0
FRESH
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       95.5    85.9  90.5      82.6               82.6      47.6  2.4
1        3.1    10.0   4.8      -1.0               -1.0      -1.0 -1.0

----- propaganda -----
% labeled 1: 42.

  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
#Model prediction results against annotated labels
for feat in features:
    print(f'\n----- {feat} -----')
    true_labels = [feat in x for x in get_annot_labels(full_data)]
    bert_path = os.path.join('results', f'{feat}_bert_predictions.json')
    fresh_path = os.path.join('results', f'{feat}_fresh_predictions.json') 
    bert_full_labels = load_pred_file(bert_path)
    fresh_full_labels = load_pred_file(fresh_path)
    print(f'% labeled 1: {round(sum(true_labels)*100/len(true_labels),1)}\n')
    print('BERT')
    bert_results = score_preds(true_labels, bert_full_labels)
    print(bert_results)
    print('FRESH')
    fresh_results = score_preds(true_labels, fresh_full_labels)
    print(fresh_results)
    print('WEAK')
    weak_labels = get_weak_labels(full_data)
    weak_labels = [int(feat in x) for x in weak_labels]
    weak_results  = score_preds(true_labels, weak_labels)
    print(weak_results)
    
sen_labels, true_sen_labels = gen_sentiment_labels(full_data)
print("Sentiment recall", metrics.recall_score(true_sen_labels, sen_labels, pos_label=1, average='binary'))
print("Sentiment recall", metrics.jaccard_score(true_sen_labels, sen_labels))


----- hatespeech -----
% labeled 1: 10.6

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       89.0    88.3  88.7      79.8               79.8      48.3  4.1
1        7.7     8.2   7.9      -1.0               -1.0      -1.0 -1.0
FRESH
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       88.5    88.1  88.3      79.2               79.2      46.2  2.0
1        3.9     4.1   4.0      -1.0               -1.0      -1.0 -1.0
WEAK
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       89.4   100.0  94.4      89.4               89.4      47.2  0.0
1        0.0     0.0   0.0      -1.0               -1.0      -1.0 -1.0

----- junkscience -----
% labeled 1: 2.6

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       97.2    94.2  95.7      91.8               91.8      47.9  0.0
1        0.0     0.0   0.0      -1.0               -1.0      -1.0 -1.0
FRESH
   Precision  Recall    F1  Accurac

  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()
  _warn_prf(average, modifier, msg_start, len(result))
  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()
  _warn_prf(average, modifier, msg_start, len(result))
  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()


   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro  iou
0       97.4   100.0  98.7      97.4               97.4      49.3  0.0
1        0.0     0.0   0.0      -1.0               -1.0      -1.0 -1.0

----- propaganda -----
% labeled 1: 36.9

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       60.7    72.2  65.9      52.9               52.9      44.9  13.5
1       29.6    20.0  23.9      -1.0               -1.0      -1.0  -1.0
FRESH
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       61.2    67.7  64.3      52.5               52.5      46.7  17.0
1       32.4    26.5  29.1      -1.0               -1.0      -1.0  -1.0
WEAK
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       85.4    78.4  81.7      77.9               77.9      76.8  56.2
1       67.5    77.1  72.0      -1.0               -1.0      -1.0  -1.0

----- satire -----
% labeled 1: 12.4

BERT
   Precision  Recall    F1  Accuracy

  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()
  _warn_prf(average, modifier, msg_start, len(result))
  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()
  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()


% labeled 1: 20.0

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       80.1    81.6  80.8      69.0               69.0      50.0  10.6
1       20.0    18.5  19.2      -1.0               -1.0      -1.0  -1.0
FRESH
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       80.0    80.2  80.1      68.1               68.1      49.9  10.9
1       19.8    19.6  19.7      -1.0               -1.0      -1.0  -1.0
WEAK
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       87.7    83.5  85.6      77.4               77.4      67.0  32.0
1       44.5    53.3  48.5      -1.0               -1.0      -1.0  -1.0

----- conspiracytheory -----
% labeled 1: 16.7

BERT
   Precision  Recall    F1  Accuracy  Balanced Accuracy  F1-Macro   iou
0       82.7    58.3  68.4      55.1               55.1      45.4  12.7
1       15.8    39.0  22.5      -1.0               -1.0      -1.0  -1.0
FRESH
   Precision  Recall    F1  Accuracy  Bal

  _warn_prf(average, modifier, msg_start, len(result))


Sentiment recall 0.7352941176470589
Sentiment recall 0.2403846153846154


In [26]:
# Gather all of the predicted and labeled features
full_vectors = {'BERT':[], 'FRESH':[], 'WEAK':[], 'ANNOT':[]}
annot_full_labels = get_annot_labels(full_data)
weak_full_labels = get_weak_labels(full_data)
for feat in features:
    bert_path = os.path.join('results', f'{feat}_bert_predictions.json')
    fresh_path = os.path.join('results', f'{feat}_fresh_predictions.json')  
    bert_full_labels = load_pred_file(bert_path)
    fresh_full_labels = load_pred_file(fresh_path)
    weak_labels = [int(feat in x) for x in weak_full_labels]
    annot_labels = [int(feat in x) for x in annot_full_labels]
    models_full_labels = {
        'BERT':bert_full_labels,
        'FRESH':fresh_full_labels,
        'WEAK':weak_labels,
        'ANNOT':annot_labels
    }
    for key in ['BERT', 'FRESH', 'WEAK', 'ANNOT']:
        for i, lab in enumerate(models_full_labels[key]):
            if i >= len(full_vectors[key]):
                full_vectors[key].append([lab])
            else:
                full_vectors[key][i].append(lab)

full_pred_sent, full_true_sent = gen_sentiment_labels(full_data)
for key in ['BERT', 'FRESH']:
    for i, lab in enumerate(full_pred_sent):
        full_vectors[key][i].append(lab)

for i, lab in enumerate(full_true_sent):
    full_vectors['ANNOT'][i].append(lab)

  annot_labels = np.array([x.strip().split(' ') if not str(x) == 'nan' else '' for x in annot_labels]).transpose()


In [29]:
# Fit agenda clasification models with cross-validation

bin_labels = [round((s-1)/5) for s in full_labels]
print('% 1 labels', round(sum(bin_labels)/len(bin_labels)*100, 1))
full_labs = [round((s-1)/5) for s in full_labels]
for model in ['FRESH', 'BERT', 'WEAK', 'ANNOT']:
    print(f'----- {model} ------')
    lmodel = sklearn.linear_model.LogisticRegression()
    full_vecs = full_vectors[model]
    sz = int(0.1*len(full_vecs))
    f10, f11, f1mac, acc, balacc = [], [], [], [], []
    for i in range(10):
        lmodel.fit(full_vecs[0:i*sz]+full_vecs[(i+1)*sz:], full_labs[0:i*sz]+full_labs[(i+1)*sz:])
        preds = lmodel.predict(full_vecs[i*sz:(i+1)*sz])
        f10.append(metrics.f1_score(full_labs[i*sz:(i+1)*sz], preds, pos_label=0))
        f11.append(metrics.f1_score(full_labs[i*sz:(i+1)*sz], preds))
        f1mac.append(metrics.f1_score(full_labs[i*sz:(i+1)*sz], preds, average='macro'))
        acc.append(metrics.accuracy_score(full_labs[i*sz:(i+1)*sz], preds))    
        balacc.append(metrics.balanced_accuracy_score(full_labs[i*sz:(i+1)*sz], preds))
    print("LogisticReg F1-0",np.mean(f10))
    print("LogisticReg F1-1",np.mean(f11))
    print("LogisticReg F1macro",np.mean(f1mac))
    print("LogisticReg accuracy",np.mean(acc))
    print("LogisticReg balanced accuracy", np.mean(balacc))

% 1 labels 41.6
----- FRESH ------
LogisticReg F1-0 0.6757004024107971
LogisticReg F1-1 0.2592760230528993
LogisticReg F1macro 0.4674882127318482
LogisticReg accuracy 0.5565217391304349
LogisticReg balanced accuracy 0.5194941262716093
----- BERT ------
LogisticReg F1-0 0.6846900923065399
LogisticReg F1-1 0.28677158283420146
LogisticReg F1macro 0.4857308375703706
LogisticReg accuracy 0.5652173913043478
LogisticReg balanced accuracy 0.5204200686035501
----- WEAK ------
LogisticReg F1-0 0.653448734695355
LogisticReg F1-1 0.449193167388888
LogisticReg F1macro 0.5513209510421215
LogisticReg accuracy 0.5847826086956521
LogisticReg balanced accuracy 0.576361935590478
----- ANNOT ------
LogisticReg F1-0 0.8065622540874153
LogisticReg F1-1 0.6789804434382675
LogisticReg F1macro 0.7427713487628413
LogisticReg accuracy 0.7608695652173914
LogisticReg balanced accuracy 0.7440875755076132


In [31]:
# Ablations with the annotated data on balanced accuracy
labs = ["hate", "junksci", "prop", "sat", "click", "conspiracy", "sentiment"]
new_scores = [round((s-1)/5) for s in full_labels]
full_features = full_vectors['ANNOT']
sz = int(0.1*len(full_vecs))
for i in range(1, 8):
    print(f'-----{labs[i-1]}-----')
    balacc = []
    lmodel = sklearn.linear_model.LogisticRegression()
    for j in range(10):
        split_feat = full_features[0:j*sz]+full_features[(j+1)*sz:]
        split_feat = [vec[:i-1] + vec[i:] for vec in split_feat]
        lmodel.fit(split_feat, new_scores[0:j*sz]+new_scores[(j+1)*sz:])
        test_feat = full_features[j*sz:(j+1)*sz]
        test_feat = [vec[:i-1] + vec[i:] for vec in test_feat]
        preds = lmodel.predict(test_feat)
        balacc.append(metrics.balanced_accuracy_score(new_scores[j*sz:(j+1)*sz], preds))
    print("Balanced accuracy",np.mean(balacc))

-----hate-----
Balanced accuracy 0.7053730775669995
-----junksci-----
Balanced accuracy 0.7440875755076132
-----prop-----
Balanced accuracy 0.7252297815027708
-----sat-----
Balanced accuracy 0.7328845906904979
-----click-----
Balanced accuracy 0.7271380847322984
-----conspiracy-----
Balanced accuracy 0.7359709183756529
-----sentiment-----
Balanced accuracy 0.6984866665877545


In [32]:
# Ablations with the FRESH output on balanced accuracy
labs = ["hate", "junksci", "prop", "sat", "click", "conspiracy", "sentiment"]
new_scores = [round((s-1)/5) for s in full_labels]
full_features = full_vectors['FRESH']
sz = int(0.1*len(full_vecs))
for i in range(1, 8):
    print(f'-----{labs[i-1]}-----')
    balacc = []
    lmodel = sklearn.linear_model.LogisticRegression()
    for j in range(10):
        split_feat = full_features[0:j*sz]+full_features[(j+1)*sz:]
        split_feat = [vec[:i-1] + vec[i:] for vec in split_feat]
        lmodel.fit(split_feat, new_scores[0:j*sz]+new_scores[(j+1)*sz:])
        test_feat = full_features[j*sz:(j+1)*sz]
        test_feat = [vec[:i-1] + vec[i:] for vec in test_feat]
        preds = lmodel.predict(test_feat)
        balacc.append(metrics.balanced_accuracy_score(new_scores[j*sz:(j+1)*sz], preds))
    print("Balanced accuracy",np.mean(balacc))

-----hate-----
Balanced accuracy 0.5160988654334137
-----junksci-----
Balanced accuracy 0.5004219701456775
-----prop-----
Balanced accuracy 0.5301092056366887
-----sat-----
Balanced accuracy 0.5155529285804116
-----click-----
Balanced accuracy 0.49691330869737066
-----conspiracy-----
Balanced accuracy 0.5136507546673681
-----sentiment-----
Balanced accuracy 0.5


In [33]:
# Ablations with the BERT output on balanced accuracy
labs = ["hate", "junksci", "prop", "sat", "click", "conspiracy", "sentiment"]
new_scores = [round((s-1)/5) for s in full_labels]
full_features = full_vectors['BERT']
sz = int(0.1*len(full_vecs))
for i in range(1, 8):
    print(f'-----{labs[i-1]}-----')
    balacc = []
    lmodel = sklearn.linear_model.LogisticRegression()
    for j in range(10):
        split_feat = full_features[0:j*sz]+full_features[(j+1)*sz:]
        split_feat = [vec[:i-1] + vec[i:] for vec in split_feat]
        lmodel.fit(split_feat, new_scores[0:j*sz]+new_scores[(j+1)*sz:])
        test_feat = full_features[j*sz:(j+1)*sz]
        test_feat = [vec[:i-1] + vec[i:] for vec in test_feat]
        preds = lmodel.predict(test_feat)
        balacc.append(metrics.balanced_accuracy_score(new_scores[j*sz:(j+1)*sz], preds))
    print("Balanced accuracy",np.mean(balacc))

-----hate-----
Balanced accuracy 0.5213166147696902
-----junksci-----
Balanced accuracy 0.5204200686035501
-----prop-----
Balanced accuracy 0.5106608955086017
-----sat-----
Balanced accuracy 0.5204200686035501
-----click-----
Balanced accuracy 0.5126592296978083
-----conspiracy-----
Balanced accuracy 0.514725902437785
-----sentiment-----
Balanced accuracy 0.49206724727671247
