# Import

These requirements are necessary if you launch this notebook from SageMaker instances

In [1]:
"""!pip install mlflow
!pip install pytorch-lightning
!pip install transformers
!pip install tqdm
!pip install sagemaker
!pip install s3fs
!pip install smdebug"""

'!pip install mlflow\n!pip install pytorch-lightning\n!pip install transformers\n!pip install tqdm\n!pip install sagemaker\n!pip install s3fs\n!pip install smdebug'

In [1]:
import sys
sys.path.append('../../../')

import os
import sys
import logging
import argparse
from pathlib import Path
from ast import literal_eval
from collections import Counter
from typing import Any, Dict, Optional

In [2]:
from tqdm.auto import tqdm
from ast import literal_eval

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import sagemaker
from sagemaker import get_execution_role
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import MLFlowLogger

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report, multilabel_confusion_matrix


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

Local constants, regarding the data, MLFlow server, paths, etc..: use them

In [3]:
from deep.constants import *
from deep.utils import *

In [4]:
%load_ext autoreload
%autoreload 2

## Sagemaker Prep

### Session

Configure SageMaker

In [5]:
sess = sagemaker.Session(default_bucket=DEV_BUCKET.name)
role = SAGEMAKER_ROLE
role_arn = SAGEMAKER_ROLE_ARN
tracking_uri = MLFLOW_SERVER

In [6]:
from mlflow import sagemaker

In [7]:
sagemaker.deploy(
    'all-models-v1',
    's3://deep-mlflow-artifact/19/0c982334d1e149c999707f79648bc08c/artifacts/pyfunc_models_all',
    execution_role_arn=SAGEMAKER_ROLE_ARN,
    image_url="961104659532.dkr.ecr.us-east-1.amazonaws.com/mlflow-pyfunc:latest",
    region_name="us-east-1",
    instance_type="ml.g4dn.xlarge",
    synchronous=False,
    archive=True,
)

2021/10/28 12:18:24 INFO mlflow.sagemaker: Using the python_function flavor for deployment!
2021/10/28 12:18:25 INFO mlflow.sagemaker: No model data bucket specified, using the default bucket
2021/10/28 12:18:26 INFO mlflow.sagemaker: Default bucket `mlflow-sagemaker-us-east-1-961104659532` already exists. Skipping creation.
2021/10/28 12:20:25 INFO mlflow.sagemaker: tag response: {'ResponseMetadata': {'RequestId': '78D6C433NRZDKFD5', 'HostId': 'BqEb62UA4rLyG0PK3Ggc3kK3UAHQsYOSVhTKpIcBNNA/CoX7TJon+NPRrSW57XhLkW/y0ip/8S0=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'BqEb62UA4rLyG0PK3Ggc3kK3UAHQsYOSVhTKpIcBNNA/CoX7TJon+NPRrSW57XhLkW/y0ip/8S0=', 'x-amz-request-id': '78D6C433NRZDKFD5', 'date': 'Thu, 28 Oct 2021 10:20:26 GMT', 'server': 'AmazonS3', 'content-length': '0'}, 'RetryAttempts': 0}}
2021/10/28 12:20:25 INFO mlflow.sagemaker: Creating new endpoint with name: all-models-v1 ...
2021/10/28 12:20:25 INFO mlflow.sagemaker: Created model with arn: arn:aws:sagemaker:us-east-1:9

In [33]:
def flatten(t):
    return [item for sublist in t for item in sublist]

import numpy as np

multilabel_columns = [
    #'sectors', 
    #'subpillars_2d', 
    #'subpillars_1d', 
    #'demographic_groups', 
    #'affected_groups', 
    'specific_needs_groups'
    ]


all_columns = [
    #'sectors', 
    #'subpillars_2d', 
    #'subpillars_1d', 
    #'demographic_groups', 
    #'affected_groups', 
    'specific_needs_groups',
    #'severity'
    ]


def get_predictions(test_probas, thresholds_dict, nb_entries=100):  
    """
    test_probas structure example: {
        'sectors':[
            {'Nutrition': 0.032076582, 'Shelter': 0.06674846}, 
            {'Cross': 0.21885818,'Education': 0.07529669}
        ],
        'demographic_groups':[
            {'Children/Youth Female (5 to 17 years old)': 0.47860646, 'Children/Youth Male (5 to 17 years old)': 0.42560646},
            {'Children/Youth Male (5 to 17 years old)': 0.47860646, 'Infants/Toddlers (<5 years old)': 0.85}
        ],
        .
        .
        .
    }
    
    thresholds_dict structure example: {
        'sectors':{
            'Agriculture': 0.2,
            'Cross': 0.02,
            .
            .
        },
        'subpillars_2d':{
            'Humanitarian Conditions->Physical And Mental Well Being': 0.7,
            .
            .
        },
        .
        .     
    }
    
    First iteration:
    - create dict which has the same structure as 'test_probas': 
    - contains ratio probability of output divided by the threshold
    
    Second iteration:
    - keep ratios superior to 1 except:
        - for subpillars_2d: when no ratio is superior to 1 but there is at least one prediction for sectors
        - for severity (no threshold, just keep max if there is 'Humanitarian Conditions' in secondary tags outputs)
    """

    #create dict of ratio between probability of output and threshold
    ratio_proba_threshold = {}
    for column in multilabel_columns:
        preds_column = test_probas[column]
        dict_keys = list(thresholds_dict[column].keys())

        returned_values_column = []
        for preds_sent in preds_column:
            dict_entry = {key:preds_sent[key]/thresholds_dict[column][key] for key in dict_keys }
            returned_values_column.append(dict_entry)
        ratio_proba_threshold[column] = returned_values_column

    predictions = {column:[] for column in all_columns}
    for entry_nb in range (nb_entries):

        # get the entries where the ratio is superior to 1 and put them in a dict {prediction:probability}
        for column in multilabel_columns:
            preds_column = ratio_proba_threshold[column][entry_nb]
            preds_entry = [
                sub_tag for sub_tag in list(preds_column.keys()) if ratio_proba_threshold[column][entry_nb][sub_tag]>1
            ]

            #postprocessing to keep only cross if more than one prediction
            """if column=='sectors' and len(preds_entry)>1:
                preds_entry.append('Cross')"""

            predictions[column].append(preds_entry)


        #postprocess 'subpillars_2d'
        """if len(predictions['sectors'][entry_nb])>0 and len(predictions['subpillars_2d'][entry_nb])==0:
            predictions['subpillars_2d'][entry_nb] = [
                sub_tag for sub_tag in list(preds_column.keys()) if\
                        test_probas[column][entry_nb][sub_tag] == max(list(test_probas[column][entry_nb].values()))
            ]

        if len(predictions['sectors'][entry_nb])==0 and len(predictions['subpillars_2d'][entry_nb])>0:
            predictions['subpillars_2d'][entry_nb] = []"""
            
        #severity  predictions and output
        """if 'Humanitarian Conditions' in str(predictions['subpillars_2d'][entry_nb]):
            pred_severity = [
                sub_tag for sub_tag in list(test_probas['severity'][entry_nb].keys()) if\
                test_probas['severity'][entry_nb][sub_tag] == max(list(test_probas['severity'][entry_nb].values()))
            ]

            predictions['severity'].append(pred_severity)
        else:
            predictions['severity'].append([])"""
            
    return predictions


In [18]:
from sklearn import metrics

def get_flat_matrix (column_of_columns, tag_to_id, nb_subtags):
    matrix = [[
        1 if tag_to_id[i] in column else 0 for i in range (nb_subtags)
    ] for column in column_of_columns]
    return flatten(matrix)

def assess_performance (preds, groundtruth, subtags):
    
    nb_subtags = len(subtags)
    tag_to_id = {i:subtags[i] for i in range (nb_subtags)}
    groundtruth_col = get_flat_matrix( groundtruth, tag_to_id, nb_subtags)
    preds_col = get_flat_matrix( preds, tag_to_id, nb_subtags)    
    
    results = {
        'precision': metrics.precision_score(groundtruth_col, preds_col, average='macro'),
        'recall': metrics.recall_score(groundtruth_col, preds_col, average='macro'),
        'f1': metrics.fbeta_score(groundtruth_col, preds_col, 0.8, average='macro'),
    }
    return results

In [19]:
test_df.columns

Index(['Unnamed: 0', 'entry_id', 'excerpt', 'analysis_framework_id', 'lead_id',
       'project_id', 'verified', 'sectors', 'subpillars_2d', 'subpillars_1d',
       'geo_location', 'specific_needs_groups', 'severity', 'info_date',
       'demographic_groups', 'reliability', 'affected_groups', 'source_type',
       'url', 'website', 'subpillars_2d_postprocessed',
       'subpillars_1d_postprocessed', 'language', 'sectors_preprocessed'],
      dtype='object')

In [34]:
import boto3
import timeit

DATA_PATH = os.path.join(
    '..', '..', '..', "data", "frameworks_data", 'data_v0.7','generated_dataset'
)

test_df = pd.read_csv(os.path.join(DATA_PATH, 'test_v0.7.csv'))

preds_cols = [
    'sectors_preprocessed', 
    #'subpillars_2d', 
    #'subpillars_1d',
    'specific_needs_groups',
    ]

In [35]:
test_df.shape

(5384, 24)

In [37]:
start = timeit.default_timer()
results = {col:{'precision':[],
        'recall':[],
        'f1': []
               } for col in preds_cols}

client = boto3.session.Session().client("sagemaker-runtime", region_name='us-east-1')

all_preds = []
preds = []
for i in tqdm(range(0,test_df.shape[0],100)):
    test_tmp = test_df[i:i+100]
    test_tmp = test_tmp[test_tmp['sectors'].apply(lambda x: 'Cross' not in literal_eval(x))]
    data = test_tmp[['excerpt']]
    input_json = data.to_json(orient="split")

    response = client.invoke_endpoint(
        EndpointName='all-models-v1',
        Body=input_json,
        ContentType="application/json; format=pandas-split",
    )
    output = literal_eval(response["Body"].read().decode("ascii"))


    preds = output[0]
    thresholds = output[1]
    final_preds = get_predictions(preds, thresholds, nb_entries = len(test_tmp))
    all_preds.append(final_preds)

    all_results = {}
    for column in preds_cols:
        
        results_column = assess_performance (
            final_preds[column], 
            test_tmp[column].apply(literal_eval).tolist(), 
            list(thresholds[column].keys()))

        results[column]['f1'].append(results_column['f1'])
        results[column]['recall'].append(results_column['recall'])
        results[column]['precision'].append(results_column['precision'])
    
end = timeit.default_timer()

HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))

predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 1000 10
predictions 100 1200 12
groundtruth 100 1200 12
predictions 100 1000 10
groundtruth 100 

In [38]:
print('time to predict 9 tags:', (end - start) / (len(test_df)))

time to predict 9 tags: 0.006210952629271763


In [39]:
final_scores = {column: {'f1': np.round(np.mean(results[column]['f1']), 3),
                        'recall': np.round(np.mean(results[column]['recall']), 3),
                        'precision': np.round(np.mean(results[column]['precision']), 3)}
                       for column in preds_cols}

In [40]:
final_scores

{'sectors_preprocessed': {'f1': 0.483, 'recall': 0.509, 'precision': 0.467},
 'specific_needs_groups': {'f1': 0.535, 'recall': 0.537, 'precision': 0.534}}

In [22]:
test_df.columns

Index(['Unnamed: 0', 'entry_id', 'excerpt', 'analysis_framework_id', 'lead_id',
       'project_id', 'verified', 'sectors', 'subpillars_2d', 'subpillars_1d',
       'geo_location', 'specific_needs_groups', 'severity', 'info_date',
       'demographic_groups', 'reliability', 'affected_groups', 'source_type',
       'url', 'website', 'subpillars_2d_postprocessed',
       'subpillars_1d_postprocessed', 'language'],
      dtype='object')

ratio_negative_examples_train_sectors	0.079
ratio_negative_examples_train_specific_needs_groups	0.43
ratio_negative_examples_val_sectors	0.159
ratio_negative_examples_val_specific_needs_groups	0.86

{'sectors': {'f1': 0.7804080268595655,
  'recall': 0.735234683825175,
  'precision': 0.8297208332571715},
 'specific_needs_groups': {'f1': 0.5353509621965712,
  'recall': 0.5370370370370371,
  'precision': 0.5342824074074074}}

## French
{'sectors': {'f1': 0.8106082312776003,
  'recall': 0.7556102211307624,
  'precision': 0.871518425690683},
 'subpillars_2d': {'f1': 0.48666263534309856,
  'recall': 0.5,
  'precision': 0.47853985507246377},
 'subpillars_2d_postprocessed': {'f1': 0.48666263534309856,
  'recall': 0.5,
  'precision': 0.47853985507246377},
 'subpillars_1d': {'f1': 0.49682228700546255,
  'recall': 0.5,
  'precision': 0.4948156436487639},
 'subpillars_1d_postprocessed': {'f1': 0.49682228700546255,
  'recall': 0.5,
  'precision': 0.4948156436487639},
 'specific_needs_groups': {'f1': 0.5776814931378339,
  'recall': 0.6057078405372669,
  'precision': 0.5705820085480773}}

## es:
{'sectors': {'f1': 0.7255359901310309,
  'recall': 0.686175736789064,
  'precision': 0.778796497427791},
 'subpillars_2d': {'f1': 0.48429271589595213,
  'recall': 0.5,
  'precision': 0.4747875},
 'subpillars_2d_postprocessed': {'f1': 0.48429271589595213,
  'recall': 0.5,
  'precision': 0.4747875},
 'subpillars_1d': {'f1': 0.49571411801277365,
  'recall': 0.5,
  'precision': 0.4930147058823529},
 'subpillars_1d_postprocessed': {'f1': 0.49571411801277365,
  'recall': 0.5,
  'precision': 0.4930147058823529},
 'specific_needs_groups': {'f1': 0.5378876984883212,
  'recall': 0.5388369158791035,
  'precision': 0.5395714732301624}}

## english (partial)
{'sectors': {'f1': 0.745366174594059,
  'recall': 0.7014592113539624,
  'precision': 0.7945457546843917},
 'subpillars_2d': {'f1': 0.4863329548570536,
  'recall': 0.5003407540387548,
  'precision': 0.4843109065770831},
 'subpillars_2d_postprocessed': {'f1': 0.48670402859489476,
  'recall': 0.5007639260944017,
  'precision': 0.5085751836087464},
 'subpillars_1d': {'f1': 0.4953352935458385,
  'recall': 0.5,
  'precision': 0.49239705882352947},
 'subpillars_1d_postprocessed': {'f1': 0.4953352935458385,
  'recall': 0.5,
  'precision': 0.49239705882352947},
 'specific_needs_groups': {'f1': 0.6435531044135325,
  'recall': 0.6713744334322078,
  'precision': 0.6345834181535146}}

## all languages
{'sectors': {'f1': 0.7801520885183256,
  'recall': 0.7373476698898899,
  'precision': 0.8260876969189497},
 'subpillars_2d': {'f1': 0.49836438284697837,
  'recall': 0.5102698392229874,
  'precision': 0.4953931126108611},
 'subpillars_2d_postprocessed': {'f1': 0.5017884216090068,
  'recall': 0.5117037791757205,
  'precision': 0.5706896474229045},
 'subpillars_1d': {'f1': 0.5056505310448632,
  'recall': 0.5092592592592593,
  'precision': 0.5033769063180827},
 'subpillars_1d_postprocessed': {'f1': 0.5056505310448632,
  'recall': 0.5092592592592593,
  'precision': 0.5033769063180827},
 'specific_needs_groups': {'f1': 0.5889316600070373,
  'recall': 0.6478921832488638,
  'precision': 0.5812163900505705}}

In [48]:

    
test_df = test_df[:100]


thresholds['severity'] = {}


    

In [51]:
all_results

{'sectors': {'precision': 0.7932598039215686,
  'recall': 0.7330291846413026,
  'f1': 0.7653177724267002},
 'subpillars_2d': {'precision': 0.47775,
  'recall': 0.5,
  'f1': 0.4861931592533757},
 'subpillars_2d_postprocessed': {'precision': 0.47775,
  'recall': 0.5,
  'f1': 0.4861931592533757},
 'subpillars_1d': {'precision': 0.4910294117647059,
  'recall': 0.5,
  'f1': 0.49449156215685147},
 'subpillars_1d_postprocessed': {'precision': 0.4910294117647059,
  'recall': 0.5,
  'f1': 0.49449156215685147},
 'specific_needs_groups': {'precision': 0.4970833333333333,
  'recall': 0.5,
  'f1': 0.49821748696219037}}

In [None]:
assert(1==2)

In [60]:

import fasttext
import re

In [61]:
# get the model
# https://fasttext.cc/docs/en/language-identification.html
# https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
fmodel = fasttext.load_model('../../../../translator_model.bin')
def lang_detect_ft(doc):
    if isinstance(doc, str):
        doc = re.sub("\s+", " ", doc)
        return fmodel.predict([doc])[0][0][0][len("__label__"):]
    return np.nan



In [62]:
test_df['language'] = test_df.excerpt.apply(lang_detect_ft)