In [1]:
import numpy as np
import pickle
from sklearn import metrics
import re
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

import os
import pandas as pd

from ast import literal_eval

from process_predictions import get_predictions
from utils import assess_performance, flatten, compare_preds

import random

import warnings
warnings.filterwarnings('ignore')

In [2]:
all_cols = [
    'present_prim_tags',
    'present_sec_tags',
    'sectors',
    'pillars_1d',
    'pillars_2d',
    'gender',
    'age',
    'specific_needs_groups',
    'affected_groups_levels_2_3',
    'gender_snorkel',
    'subpillars_1d',
    'subpillars_2d'
    ]

In [3]:
DATA_PATH = os.path.join(
    '..', '..', '..', '..', "data", "frameworks_data", 'data_v0.7.1'
)


test_df = pd.read_csv(os.path.join(DATA_PATH, 'new_columns_test_v0.7.1.csv'))


for col in all_cols:

    test_df[col] = test_df[col].apply(
        lambda x: [item for item in literal_eval(x) if item!='NOT_MAPPED']
    )       
    
geo_df = pd.read_csv(os.path.join(DATA_PATH, 'test_v0.7.1_geo_pred.csv'))
geo_df['geo_location_pred'] = geo_df['geo_location_pred'].apply(literal_eval)

## postprocess raw predictions and get results

In [4]:
pillars_1d_tags = ['Covid-19',
 'Casualties',
 'Context',
 'Displacement',
 'Humanitarian Access',
 'Shock/Event',
 'Information And Communication']

pillars_2d_tags = ['At Risk',
 'Priority Interventions',
 'Capacities & Response',
 'Humanitarian Conditions',
 'Impact',
 'Priority Needs']
 

In [6]:
def get_predictions_all(ratio_proba_threshold, 
    output_columns,
    pillars_2d,
    pillars_1d,
    nb_entries: int, 
    ratio_nb: int):
    
    predictions = {column:[] for column in output_columns }
    for entry_nb in range (nb_entries):
        returns_sectors = ratio_proba_threshold['sectors'][entry_nb] 
        preds_sectors = get_preds_entry (returns_sectors, False, ratio_nb)  
        predictions['sectors'].append(preds_sectors)
        
        returns_subpillars = ratio_proba_threshold['subpillars'][entry_nb] 
        
        subpillars_2d_tags = {
           key: value for key, value in returns_subpillars.items() if\
                key.split('->')[0] in pillars_2d
        }
        subpillars_1d_tags = {
           key: value for key, value in returns_subpillars.items() if\
                key.split('->')[0] in pillars_1d
        }
        if len(preds_sectors)==0:
            preds_2d = []
        else:
            preds_2d = get_preds_entry (subpillars_2d_tags, True, ratio_nb)
        
        predictions['subpillars_2d'].append(preds_2d)
        
        preds_1d = get_preds_entry (subpillars_1d_tags, False, ratio_nb)
        predictions['subpillars_1d'].append(preds_1d)
        
        returns_sec_tags = ratio_proba_threshold['secondary_tags'][entry_nb] 
        preds_entry = get_preds_entry (returns_sec_tags, False, ratio_nb)
        
        preds_gender = [
            item.split('->')[1] for item in preds_entry if item.split('->')[0]=='gender_kw_pred' 
        ]
        predictions['gender_kw_pred'].append(preds_gender)
        
        preds_age = [
            item.split('->')[1] for item in preds_entry if item.split('->')[0]=='age_kw_pred' 
        ]
        predictions['age_kw_pred'].append(preds_age)
        
        severity_tags = {
           key: value for key, value in returns_sec_tags.items() if\
                key.split('->')[0] == 'severity'
        }
        if np.any(['Humanitarian Conditions' in item for item in preds_2d]):
            preds_severity = get_preds_entry (severity_tags, True, ratio_nb, True)
            preds_severity = [item.split('->')[1] for item in preds_severity]
        else:
            preds_severity = []
        predictions['severity'].append(preds_severity)
        
    return predictions
        


def get_preds_entry (preds_column, return_at_least_one=True, ratio_nb=1, return_only_one=False):
    preds_entry = [
        sub_tag for sub_tag in list(preds_column.keys()) if preds_column[sub_tag]>ratio_nb
    ]
    if return_only_one:
        preds_entry = [
            sub_tag for sub_tag in list(preds_column.keys())\
                if preds_column[sub_tag]==max(list(preds_column.values()))
        ]
    if return_at_least_one:
        if len(preds_entry)==0:
            preds_entry = [
                sub_tag for sub_tag in list(preds_column.keys())\
                    if preds_column[sub_tag]==max(list(preds_column.values()))
            ]
    return preds_entry

In [8]:
pickle_file = open("logged_values_all_models.pickle", "rb") ##NAME OF PICKLE FILE
doc = pickle.load(pickle_file)

n_preds = len(doc['preds']['subpillars'])

subpillars_2d_tags = list(set(flatten(test_df.subpillars_2d)))
subpillars_1d_tags = list(set(flatten(test_df.subpillars_1d)))

final_preds = get_predictions_all(
    doc['preds'], 
    ['sectors', 'subpillars_2d', 'subpillars_1d', 'gender_kw_pred', 'age_kw_pred', 'severity'],
    pillars_2d=pillars_2d_tags,
    pillars_1d=pillars_1d_tags,
    nb_entries=n_preds, 
    ratio_nb=1)


In [9]:
predictions_df = test_df[[
    'excerpt', 'entry_id', 'lang', 'analysis_framework_id'
]]
predictions_df['sectors'] = final_preds['sectors']
predictions_df['subpillars_2d'] = final_preds['subpillars_2d']
predictions_df['subpillars_1d'] = final_preds['subpillars_1d']
predictions_df['gender'] = final_preds['gender_kw_pred']
predictions_df['age'] = final_preds['age_kw_pred']
predictions_df['severity'] = final_preds['severity']

In [9]:
predictions_df.head(2)

Unnamed: 0,excerpt,entry_id,lang,analysis_framework_id,sectors,subpillars_2d,subpillars_1d,gender,age,severity
0,"During the reporting week, IOM provided medica...",16851,en,137.0,[Health],[Capacities & Response->International Response],[],[Female],[Adult (18 to 59 years old)],[]
1,Primary and secondary net enrollment rates are...,489433,en,1306.0,[Education],[Humanitarian Conditions->Living Standards],[],"[Female, Male]",[Children/Youth (5 to 17 years old)],[Major]


In [10]:
predictions_df.sectors.apply(lambda x: len(x)>0).sum() / predictions_df.shape[0]

0.699686083013603

In [23]:
test_df.sectors.apply(lambda x: len(x)>0).sum() / test_df.shape[0]

0.7880479014068131

In [10]:
cols = ['sectors', 'subpillars_2d', 'subpillars_1d']
raw_results = {}
tot_wrong_ids = {}
for col in cols:
    if col=='sectors':
        ids_not_cross = test_df.sectors.apply(lambda x: 'Cross' not in x)
        gt_sectors = test_df[ids_not_cross].sectors.tolist()
        preds_sectors = predictions_df[ids_not_cross].sectors.tolist()
        preds = compare_preds(gt_sectors, preds_sectors)
    else:    
        preds = compare_preds(test_df[col].values, final_preds[col])
        
    results_col = preds['scores']
    raw_results[col] = results_col
    
df_results_raw = pd.DataFrame.from_dict(raw_results, orient='columns')
df_results_raw
#df_results_raw.iloc[[0, 3,4]]

Unnamed: 0,sectors,subpillars_2d,subpillars_1d
proportion_perfect_matches,0.69,0.42,0.69
proportion_at_least_one_false,0.31,0.58,0.31
proportion_at_leaset_one_missing,0.2,0.35,0.17


In [12]:
results_subpillars_2d = assess_performance (
    final_preds['subpillars_2d'], 
    test_df['subpillars_2d'].tolist(), 
    sorted(list(set(flatten(test_df['subpillars_2d'])))),
    'subpillars_2d'
    )
results_subpillars_2d

Unnamed: 0,macro_precision,macro_recall,macro_f1_score,1_precision,0_precision,1_recall,0_recall,1_f1_score,0_f1_score,hamming_loss
At Risk->Number Of People At Risk,0.504,0.525,0.506,0.009,0.999,0.056,0.994,0.016,0.996,0.007
At Risk->Risk And Vulnerabilities,0.706,0.723,0.714,0.447,0.964,0.488,0.958,0.467,0.961,0.072
Capacities & Response->International Response,0.819,0.789,0.803,0.682,0.957,0.609,0.968,0.643,0.963,0.067
Capacities & Response->Local Response,0.562,0.648,0.587,0.126,0.998,0.3,0.995,0.178,0.997,0.006
Capacities & Response->National Response,0.68,0.685,0.682,0.386,0.974,0.397,0.972,0.392,0.973,0.052
Capacities & Response->Number Of People Reached/Response Gaps,0.798,0.808,0.803,0.609,0.988,0.629,0.987,0.619,0.987,0.024
Humanitarian Conditions->Coping Mechanisms,0.739,0.717,0.727,0.497,0.98,0.45,0.984,0.472,0.982,0.035
Humanitarian Conditions->Living Standards,0.753,0.755,0.754,0.594,0.912,0.601,0.909,0.597,0.911,0.146
Humanitarian Conditions->Number Of People In Need,0.682,0.672,0.677,0.37,0.994,0.35,0.994,0.36,0.994,0.012
Humanitarian Conditions->Physical And Mental Well Being,0.765,0.777,0.77,0.577,0.952,0.607,0.946,0.592,0.949,0.09


In [13]:
results_subpillars_1d = assess_performance (
    final_preds['subpillars_1d'], 
    test_df['subpillars_1d'].tolist(), 
    sorted(list(set(flatten(test_df['subpillars_1d'])))),
    'subpillars_1d'
    )
results_subpillars_1d

Unnamed: 0,macro_precision,macro_recall,macro_f1_score,1_precision,0_precision,1_recall,0_recall,1_f1_score,0_f1_score,hamming_loss
Casualties->Dead,0.822,0.825,0.824,0.652,0.992,0.658,0.992,0.655,0.992,0.015
Casualties->Injured,0.714,0.817,0.755,0.429,0.999,0.636,0.997,0.512,0.998,0.005
Casualties->Missing,0.597,0.654,0.619,0.196,0.999,0.31,0.998,0.24,0.998,0.003
Context->Demography,0.705,0.713,0.709,0.421,0.989,0.437,0.989,0.429,0.989,0.022
Context->Economy,0.814,0.784,0.798,0.644,0.984,0.58,0.988,0.61,0.986,0.027
Context->Environment,0.624,0.694,0.651,0.253,0.996,0.397,0.992,0.309,0.994,0.012
Context->Legal & Policy,0.739,0.678,0.704,0.488,0.99,0.362,0.994,0.415,0.992,0.016
Context->Politics,0.705,0.771,0.733,0.416,0.994,0.552,0.99,0.474,0.992,0.016
Context->Security & Stability,0.791,0.798,0.794,0.6,0.981,0.616,0.979,0.608,0.98,0.038
Context->Socio Cultural,0.627,0.668,0.644,0.261,0.993,0.346,0.989,0.298,0.991,0.018


In [14]:
predictions_df.to_csv('total_predictions.csv', index=None)

In [15]:
ids_not_cross = test_df.sectors.apply(lambda x: 'Cross' not in x)
gt_sectors = test_df[ids_not_cross].sectors.tolist()
preds_sectors = predictions_df[ids_not_cross].sectors.tolist()
#results_col = compare_preds(gt_sectors, preds_sectors)

assess_performance (
    preds_sectors, 
    gt_sectors, 
    sorted(list(set(flatten(gt_sectors)))),
    'sectors'
    )

Unnamed: 0,macro_precision,macro_recall,macro_f1_score,1_precision,0_precision,1_recall,0_recall,1_f1_score,0_f1_score,hamming_loss
Agriculture,0.787,0.796,0.792,0.583,0.992,0.601,0.991,0.592,0.991,0.017
Education,0.909,0.91,0.91,0.831,0.988,0.833,0.987,0.832,0.988,0.023
Food Security,0.911,0.901,0.906,0.841,0.98,0.82,0.982,0.831,0.981,0.034
Health,0.861,0.853,0.857,0.791,0.932,0.766,0.941,0.778,0.937,0.099
Livelihoods,0.857,0.814,0.834,0.755,0.959,0.654,0.974,0.701,0.967,0.06
Logistics,0.843,0.779,0.808,0.695,0.991,0.564,0.995,0.623,0.993,0.015
Nutrition,0.878,0.915,0.895,0.762,0.994,0.839,0.991,0.798,0.992,0.015
Protection,0.853,0.839,0.846,0.767,0.94,0.726,0.951,0.746,0.946,0.09
Shelter,0.88,0.835,0.856,0.779,0.982,0.68,0.989,0.726,0.986,0.027
WASH,0.929,0.901,0.914,0.873,0.984,0.812,0.99,0.842,0.987,0.024


## Create testing df for taggers (not needed for predictions and deployment)

In [16]:
geo_df = geo_df.rename(columns={'geo_location_pred': 'geo_location'})
predictions_df = predictions_df.merge(
    right=geo_df,
    on='entry_id'
)

In [17]:
geo_df.columns

Index(['entry_id', 'geo_location'], dtype='object')

In [18]:
def get_clean_output(x):
    return '\n'.join([str({
        '{:s}'.format('\u0332'.join(key)): value}
    ) for key, value in x.items()])

def clean_excerpt(x):
    n = 12
    split_text = x.split(' ')
    final_list = []
    for i in range(0, len(split_text), n): 
        final_list.append(' '.join(split_text[i:i + n]))
    return '\n'.join(final_list)

In [24]:
predictions_df['test_primary_tags'] = test_df.apply(
    lambda x: x.sectors + x.subpillars_1d + x.subpillars_2d + [x.analysis_framework_id], axis=1
)

en_df = predictions_df[predictions_df.lang == 'en']
en_df["test_primary_tags"] = en_df["test_primary_tags"].apply(str)
ids = en_df.groupby("test_primary_tags")["entry_id"].agg(list).values
unique_ids = [list(np.unique(list_)) for list_ in ids]

tot_ids = sorted(unique_ids, key=len)
ids = [
    random.sample(id_tmp, int(len(id_tmp) * 0.02)) for id_tmp in tot_ids
]
taggers_ids = flatten(ids)

tested_cols = [
    'sectors', 'subpillars_2d', 'subpillars_1d', 'age', 'gender', 'severity', 'geo_location' 
]
kept_cols = ['entry_id', 'excerpt'] + tested_cols
taggers_df = predictions_df[predictions_df.entry_id.isin(taggers_ids)][kept_cols]
taggers_df['excerpt'] = taggers_df['excerpt'].apply(clean_excerpt)
taggers_df['predictions'] = taggers_df.apply(
    lambda x: get_clean_output({col:x[col] for col in tested_cols}), axis=1
)
taggers_df['wrong_tags'] = taggers_df.apply(
    lambda x: get_clean_output({col:[] for col in tested_cols}), axis=1
)
taggers_df['missing_tags'] = taggers_df.apply(
    lambda x: get_clean_output({col:[] for col in tested_cols}), axis=1
)
taggers_df.sectors.apply(lambda x: len(x)>0).sum()

taggers_df['specific_needs_groups'] = '[]'
taggers_df['affected_groups'] = '[]'

taggers_df[[
    'entry_id', 
    'excerpt', 
    'predictions', 
    'wrong_tags', 
    'missing_tags',
    'specific_needs_groups',
    'affected_groups'
]].to_csv(
    'models_testing_round3.csv', index=None
)


In [25]:
taggers_df.sectors.apply(lambda x: len(x)>0).sum() / taggers_df.shape[0]

0.6065573770491803

In [21]:
pd.read_csv('models_testing_round3.csv')

Unnamed: 0,entry_id,excerpt,predictions,wrong_tags,missing_tags,specific_needs_groups,affected_groups
0,489933,"[31st Aug 2021, Adamawa State] (Cholera) The t...",{'s̲e̲c̲t̲o̲r̲s': ['Health']}\n{'s̲u̲b̲p̲i̲l̲l...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
1,183653,"[7th October 2020, Overall Syria] The Ministry...",{'s̲e̲c̲t̲o̲r̲s': ['Health']}\n{'s̲u̲b̲p̲i̲l̲l...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
2,271808,"[February 25, As Sweida] The Director of Healt...",{'s̲e̲c̲t̲o̲r̲s': ['Protection']}\n{'s̲u̲b̲p̲i...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
3,174921,In several UNHCR-managed displacement sites in...,{'s̲e̲c̲t̲o̲r̲s': ['Health']}\n{'s̲u̲b̲p̲i̲l̲l...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
4,174554,"[ 1st July 2020, Borno states] Depicts the nom...",{'s̲e̲c̲t̲o̲r̲s': ['Food Security']}\n{'s̲u̲b̲...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
...,...,...,...,...,...,...,...
98,303849,"Other evidence, including hate statements tagg...",{'s̲e̲c̲t̲o̲r̲s': ['Protection']}\n{'s̲u̲b̲p̲i...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
99,272168,(S)GBV is a plight in Afghanistan. In the bigg...,{'s̲e̲c̲t̲o̲r̲s': ['Protection']}\n{'s̲u̲b̲p̲i...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
100,319997,"Fighting was reported mainly in Kandahar, Helm...",{'s̲e̲c̲t̲o̲r̲s': ['Protection']}\n{'s̲u̲b̲p̲i...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]
101,338477,- Project and programme implementors need to e...,{'s̲e̲c̲t̲o̲r̲s': ['Livelihoods']}\n{'s̲u̲b̲p̲...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,{'s̲e̲c̲t̲o̲r̲s': []}\n{'s̲u̲b̲p̲i̲l̲l̲a̲r̲s̲_...,[],[]


In [10]:
test_df.severity

0        ['No problem']
1             ['Major']
2        ['Of Concern']
3                    []
4                    []
              ...      
17197      ['Critical']
17198      ['Critical']
17199      ['Critical']
17200      ['Critical']
17201                []
Name: severity, Length: 17202, dtype: object

In [18]:
sorted(list(set(flatten(test_df.subpillars_2d))))

['At Risk->Number Of People At Risk',
 'At Risk->Risk And Vulnerabilities',
 'Capacities & Response->International Response',
 'Capacities & Response->Local Response',
 'Capacities & Response->National Response',
 'Capacities & Response->Number Of People Reached/Response Gaps',
 'Humanitarian Conditions->Coping Mechanisms',
 'Humanitarian Conditions->Living Standards',
 'Humanitarian Conditions->Number Of People In Need',
 'Humanitarian Conditions->Physical And Mental Well Being',
 'Impact->Driver/Aggravating Factors',
 'Impact->Impact On People',
 'Impact->Impact On Systems, Services And Networks',
 'Impact->Number Of People Affected',
 'Priority Interventions->Expressed By Humanitarian Staff',
 'Priority Interventions->Expressed By Population',
 'Priority Needs->Expressed By Humanitarian Staff',
 'Priority Needs->Expressed By Population']

In [14]:
test_df.severity.apply(str).unique()

array(["['No problem']", "['Major']", "['Of Concern']", '[]',
       "['Critical']", "['NOT_MAPPED']", "['Minor Problem']",
       "['Major', 'Critical']", 'nan'], dtype=object)