In [1]:
# --------------------------------------------------
# PhoMemes 2022 Evaluation Script
#  - For Disinformation Challenge Evaluation
#
# Used to evaluate PhoMeme challenge submission for
#  for 
# --------------------------------------------------
version = 1.0 # Notebook Version Number
edition = "c2"

import os
cwd = os.getcwd()

In [2]:
import json

import numpy as np
import pandas as pd

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [3]:
model_info = None
with open("metadata.json", "r") as in_file:
    model_info = json.load(in_file)
    
model_info

{'runtag': '1.1',
 'date': '2022-05-25',
 'organization': 'CMU-CASOS',
 'model_description': 'Visual embeddings combined with supervised learning on user distribution features of their visual embeddings.',
 'type': 'automatic',
 'paper': 'https://github.com/ijcruic/PhoMemes2022-Submission',
 'code': 'https://github.com/ijcruic/PhoMemes2022-Submission'}

In [4]:
groundtruth_df = pd.read_csv("../../../data/c2.disinfo/groundtruth.csv", index_col="user_id")
run_df = pd.read_csv("run.csv", index_col="user_id")

In [5]:
joined_df = run_df.join(groundtruth_df, how="inner", lsuffix="_pred", rsuffix="_truth")

In [6]:
missing = set(groundtruth_df.index).difference(joined_df.index)
print("Missing:", len(missing))

if len(missing) > 0:
    raise Exception("MISSING")

Missing: 411


Exception: MISSING

In [7]:
run_df

Unnamed: 0_level_0,authentic,campaign
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
00089fde8c02ff8682194c07e1feba12-22748,0,campaign.iranian_2018
003a3149a530c30fc087a3c6a1569f64-2614,0,campaign.russia_2018
003a4f66e267e8ccfacd07a197c35b72-8870,0,campaign.venezuela_2019
00502fc7175a80a670d3a26879a37bad-6177,0,campaign.china_2019
008be68978727b3bfb55f6e18dde046d-51962,1,
...,...,...
ff6590876f45c76cf24135a20b408697-22705,0,campaign.iranian_2018
ff88be354761f97bf509a011bf54562a-46589,0,campaign.china_2019
ff8f4d72f3c03218dd4b4e74b2d40ac1-6053,1,
ff90f04ffe4d1c826248232328756555-41656,1,


In [8]:
joined_df.columns

Index(['authentic_pred', 'campaign_pred', 'authentic_truth', 'campaign_truth'], dtype='object')

In [9]:
disinfo_agent_prediction = [True if a == 0 else False for a in joined_df["authentic_pred"]]
disinfo_agent_truth = [True if a == 0 else False for a in joined_df["authentic_truth"]]

ac_score_overall = accuracy_score(disinfo_agent_prediction, disinfo_agent_truth)
pr_score_overall = precision_score(disinfo_agent_prediction, disinfo_agent_truth)
rc_score_overall = recall_score(disinfo_agent_prediction, disinfo_agent_truth)

f1_score_overall = f1_score(disinfo_agent_prediction, disinfo_agent_truth, zero_division=1)

In [10]:
print("Accuracy:", ac_score_overall)
print("Precision:", pr_score_overall)
print("Recall:", rc_score_overall)
print("F1:", f1_score_overall)

Accuracy: 0.7465393794749403
Precision: 0.701255230125523
Recall: 0.8280632411067194
F1: 0.7594019030357951


In [11]:
acs = []
prs = []
rcs = []
f1s = []

for i,j in joined_df["campaign_truth"].value_counts().items():
    print(i,j)
    
    this_pred = joined_df['campaign_pred'] == i
    this_true = joined_df['campaign_truth'] == i
    
    ac_score_this = accuracy_score(this_pred, this_true)
    acs.append(ac_score_this)
    
    pr_score_this = precision_score(this_pred, this_true, zero_division=0)
    prs.append(pr_score_this)
    
    rc_score_this = recall_score(this_pred, this_true, zero_division=0)
    rcs.append(rc_score_this)
    
    f1_score_this = f1_score(this_pred, this_true, zero_division=0)
    f1s.append(f1_score_this)
    
    print("[%s] Ac:" % i, ac_score_this)
    print("[%s] Pr:" % i, pr_score_this)
    print("[%s] Rc:" % i, rc_score_this)
    print("[%s] F1:" % i, f1_score_this)
    
    print("*" * 20)
    

None 900
[None] Ac: 0.7465393794749403
[None] Pr: 0.8066666666666666
[None] Rc: 0.6703601108033241
[None] F1: 0.7322239031770046
********************
campaign.venezuela_2019 382
[campaign.venezuela_2019] Ac: 0.8391408114558473
[campaign.venezuela_2019] Pr: 0.35340314136125656
[campaign.venezuela_2019] Rc: 0.6
[campaign.venezuela_2019] F1: 0.4448105436573312
********************
campaign.iranian_2018 327
[campaign.iranian_2018] Ac: 0.7933174224343675
[campaign.iranian_2018] Pr: 0.40978593272171254
[campaign.iranian_2018] Rc: 0.3582887700534759
[campaign.iranian_2018] F1: 0.38231098430813126
********************
campaign.russia_2018 285
[campaign.russia_2018] Ac: 0.8176610978520287
[campaign.russia_2018] Pr: 0.3684210526315789
[campaign.russia_2018] Rc: 0.34201954397394135
[campaign.russia_2018] F1: 0.3547297297297297
********************
campaign.china_2019 201
[campaign.china_2019] Ac: 0.9155131264916467
[campaign.china_2019] Pr: 0.32338308457711445
[campaign.china_2019] Rc: 0.61320754

In [12]:
ac_mean = np.mean(acs)
pr_mean = np.mean(prs)
rc_mean = np.mean(rcs)
f1_mean = np.mean(f1s)

print("Macro-Average Ac:", ac_mean)
print("Macro-Average Pr:", pr_mean)
print("Macro-Average Rc:", rc_mean)
print("Macro-Average F1:", f1_mean)

Macro-Average Ac: 0.822434367541766
Macro-Average Pr: 0.4523319755916659
Macro-Average Rc: 0.5167751944001105
Macro-Average F1: 0.46750558592036773


In [13]:
rows = [{
    "runtag": "%s-%s-%s" % (model_info["organization"], model_info["runtag"], model_info["date"]),
    "organization": model_info["organization"],
    "model_description": model_info["model_description"],
    "type": model_info["type"],
    "paper": model_info["paper"],
    "code": model_info["code"],
    "accuracy_overall": ac_score_overall,
    "precision_overall": pr_score_overall,
    "recall_overall": rc_score_overall,
    "f1_overall": f1_score_overall,
    "accuracy_macro": ac_mean,
    "precision_macro": pr_mean,
    "recall_macro": rc_mean,
    "f1_macro": f1_mean,
}]

leaderboard_df = pd.DataFrame(rows).set_index("runtag")
leaderboard_df

Unnamed: 0_level_0,organization,model_description,type,paper,code,accuracy_overall,precision_overall,recall_overall,f1_overall,accuracy_macro,precision_macro,recall_macro,f1_macro
runtag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
CMU-CASOS-1.1-2022-05-25,CMU-CASOS,Visual embeddings combined with supervised lea...,automatic,https://github.com/ijcruic/PhoMemes2022-Submis...,https://github.com/ijcruic/PhoMemes2022-Submis...,0.746539,0.701255,0.828063,0.759402,0.822434,0.452332,0.516775,0.467506


In [14]:
leaderboard_df.to_csv("leaderboard.csv")