In [1]:
# --------------------------------------------------
# PhoMemes 2022 Evaluation Script
#  - For Disinformation Challenge Evaluation
#
# Used to evaluate PhoMeme challenge submission for
#  for 
# --------------------------------------------------
version = 1.0 # Notebook Version Number
edition = "c2"

import os
cwd = os.getcwd()

In [2]:
import json

import numpy as np
import pandas as pd

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [3]:
model_info = None
with open("metadata.json", "r") as in_file:
    model_info = json.load(in_file)
    
model_info

{'runtag': 'ideology',
 'date': '2022-05-25',
 'organization': 'infeco',
 'model_description': "Using the clustering method outlined in https://arxiv.org/abs/2110.01183, we use each account's distribution of images across political-image clusters as features for classification.",
 'type': 'automatic',
 'paper': '',
 'code': ''}

In [4]:
groundtruth_df = pd.read_csv("../../../data/c2.disinfo/groundtruth.csv", index_col="user_id")
run_df = pd.read_csv("run.csv", index_col="user_id")

In [5]:
joined_df = run_df.join(groundtruth_df, how="inner", lsuffix="_pred", rsuffix="_truth")

In [6]:
missing = set(groundtruth_df.index).difference(joined_df.index)
print("Missing:", len(missing))

if len(missing) > 0:
    raise Exception("MISSING")

Missing: 411


Exception: MISSING

In [7]:
joined_df.columns

Index(['authentic_pred', 'campaign_pred', 'authentic_truth', 'campaign_truth'], dtype='object')

In [8]:
disinfo_agent_prediction = [True if a == 0 else False for a in joined_df["authentic_pred"]]
disinfo_agent_truth = [True if a == 0 else False for a in joined_df["authentic_truth"]]

ac_score_overall = accuracy_score(disinfo_agent_prediction, disinfo_agent_truth)
pr_score_overall = precision_score(disinfo_agent_prediction, disinfo_agent_truth)
rc_score_overall = recall_score(disinfo_agent_prediction, disinfo_agent_truth)

f1_score_overall = f1_score(disinfo_agent_prediction, disinfo_agent_truth, zero_division=1)

In [9]:
print("Accuracy:", ac_score_overall)
print("Precision:", pr_score_overall)
print("Recall:", rc_score_overall)
print("F1:", f1_score_overall)

Accuracy: 0.801909307875895
Precision: 0.9899581589958159
Recall: 0.7459016393442623
F1: 0.8507731032002878


In [10]:
acs = []
prs = []
rcs = []
f1s = []

for i,j in joined_df["campaign_truth"].value_counts().items():
    print(i,j)
    
    this_pred = joined_df['campaign_pred'] == i
    this_true = joined_df['campaign_truth'] == i
    
    ac_score_this = accuracy_score(this_pred, this_true)
    acs.append(ac_score_this)
    
    pr_score_this = precision_score(this_pred, this_true, zero_division=0)
    prs.append(pr_score_this)
    
    rc_score_this = recall_score(this_pred, this_true, zero_division=0)
    rcs.append(rc_score_this)
    
    f1_score_this = f1_score(this_pred, this_true, zero_division=0)
    f1s.append(f1_score_this)
    
    print("[%s] Ac:" % i, ac_score_this)
    print("[%s] Pr:" % i, pr_score_this)
    print("[%s] Rc:" % i, rc_score_this)
    print("[%s] F1:" % i, f1_score_this)
    
    print("*" * 20)
    

None 900
[None] Ac: 0.5704057279236276
[None] Pr: 0.0
[None] Rc: 0.0
[None] F1: 0.0
********************
campaign.venezuela_2019 382
[campaign.venezuela_2019] Ac: 0.8176610978520287
[campaign.venezuela_2019] Pr: 0.0
[campaign.venezuela_2019] Rc: 0.0
[campaign.venezuela_2019] F1: 0.0
********************
campaign.iranian_2018 327
[campaign.iranian_2018] Ac: 0.8439140811455847
[campaign.iranian_2018] Pr: 0.0
[campaign.iranian_2018] Rc: 0.0
[campaign.iranian_2018] F1: 0.0
********************
campaign.russia_2018 285
[campaign.russia_2018] Ac: 0.863961813842482
[campaign.russia_2018] Pr: 0.0
[campaign.russia_2018] Rc: 0.0
[campaign.russia_2018] F1: 0.0
********************
campaign.china_2019 201
[campaign.china_2019] Ac: 0.9040572792362769
[campaign.china_2019] Pr: 0.0
[campaign.china_2019] Rc: 0.0
[campaign.china_2019] F1: 0.0
********************


In [11]:
ac_mean = np.mean(acs)
pr_mean = np.mean(prs)
rc_mean = np.mean(rcs)
f1_mean = np.mean(f1s)

print("Macro-Average Ac:", ac_mean)
print("Macro-Average Pr:", pr_mean)
print("Macro-Average Rc:", rc_mean)
print("Macro-Average F1:", f1_mean)

Macro-Average Ac: 0.8
Macro-Average Pr: 0.0
Macro-Average Rc: 0.0
Macro-Average F1: 0.0


In [12]:
sub_date_ = cwd.partition("submissions/")[-1].partition("/")[-1]
sub_date_ = sub_date_.partition("-")[0]
sub_date = "%s/%s/%s" % (sub_date_[:4], sub_date_[4:6], sub_date_[6:])


In [13]:
rows = [{
    "runtag": "%s-%s-%s" % (model_info["organization"], model_info["runtag"], model_info["date"]),
    "date": sub_date,
    "organization": model_info["organization"],
    "model_description": model_info["model_description"],
    "type": model_info["type"],
    "paper": model_info["paper"],
    "code": model_info["code"],
    "accuracy_overall": ac_score_overall,
    "precision_overall": pr_score_overall,
    "recall_overall": rc_score_overall,
    "f1_overall": f1_score_overall,
    "accuracy_macro": ac_mean,
    "precision_macro": pr_mean,
    "recall_macro": rc_mean,
    "f1_macro": f1_mean,
}]

leaderboard_df = pd.DataFrame(rows).set_index("runtag")
leaderboard_df

Unnamed: 0_level_0,date,organization,model_description,type,paper,code,accuracy_overall,precision_overall,recall_overall,f1_overall,accuracy_macro,precision_macro,recall_macro,f1_macro
runtag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
infeco-ideology-2022-05-25,2022/05/25,infeco,Using the clustering method outlined in https:...,automatic,,,0.801909,0.989958,0.745902,0.850773,0.8,0.0,0.0,0.0


In [14]:
leaderboard_df.to_csv("leaderboard.csv")