In [None]:
import re
import string
import functools
from ast import literal_eval
from operator import itemgetter
from itertools import combinations
from collections import Counter, defaultdict, OrderedDict
from collections.abc import Sequence

import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer

from tqdm.notebook import tqdm

from num2words import num2words
                                                                
import nltk
from nltk import ngrams
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize

from IPython.core.display import HTML

In [None]:
tqdm.pandas()

In [None]:
def preprocess(doc):
    if doc != doc:
        return ""
    doc = doc.lower()
    # remove preceeding dates
    #doc = re.sub("^\[.+\]", " ", doc).strip()
    #doc = re.sub("^\(.+\)", " ", doc).strip()
    # spaces btw numbers and words
    doc = re.sub('(\d+(\W\d+)?)', r' \1 ', doc).strip()
    doc = re.sub("[‐‑–—―─_]", "-", doc)
    doc = re.sub("(\w)\- (\w)", r"\1\2", doc)
    doc = re.sub(
        "[" + re.escape(
            '-_@^~.()[],"“’…<❖‐»—─|•&{≥➢\ue0e4\uf0d8\uf0fc●°#\u200b>`?�€■!‘%;̧\'›«”:≤―\uf0b7$}*´=‑▪\xad❑·–'
        ) + "]", " ", doc)
    #remove some puncs
    doc = re.sub('\s+', " ", doc).strip()
    return doc
##
def preprocess_and_tokenize(doc, n=1):
    doc = preprocess(doc)
    # tokenize
    words = word_tokenize(doc)
    if n == 1:
        return words
    return set(ngrams(words, n))
##
def tokenize(words, n=1):
    if n == 1:
        return set(words)
    return set(ngrams(words, n))

In [None]:
df_train = pd.read_csv("../data/train_v0.7.1.csv",
                       usecols=[
                           'entry_id', 'excerpt', 'subpillars_1d', 'lang',
                           "translation_en", "translation_fr", "translation_es"
                       ])
df_val = pd.read_csv("../data/val_v0.7.1.csv",
                     usecols=[
                         'entry_id', 'excerpt', 'subpillars_1d', 'lang',
                         "translation_en", "translation_fr", "translation_es"
                     ])
df_test = pd.read_csv("../data/test_v0.7.1.csv",
                      usecols=[
                          'entry_id', 'excerpt', 'subpillars_1d', 'lang',
                          "translation_en", "translation_fr", "translation_es"
                      ])

In [None]:
col = 'subpillars_1d'
for df in [df_train, df_val, df_test]:
    df[col] = df[col].apply(lambda x: [
        e for e in list(sorted(list(set(literal_eval(x)))))
        if e not in ['None', 'NOT_MAPPED']
    ])

In [None]:
df_train_en = df_train.copy()
df_train_en.loc[df_train_en["lang"].ne("en"),
                "excerpt"] = df_train_en.loc[df_train_en["lang"].ne("en"),
                                             "translation_en"]
##
df_val_en = df_val.copy()
df_val_en.loc[df_val_en["lang"].ne("en"),
                "excerpt"] = df_val_en.loc[df_val_en["lang"].ne("en"),
                                             "translation_en"]
##
df_test_en = df_test.copy()
df_test_en.loc[df_test_en["lang"].ne("en"),
                "excerpt"] = df_test_en.loc[df_test_en["lang"].ne("en"),
                                             "translation_en"]

In [None]:
def unique_values(df, col):
    vals = Counter()
    for val in df[col]:
        vals.update(val)
    return vals.most_common()

In [None]:
# unique_values(df_train_en, "subpillars_1d")

In [None]:
population_to_relief = 'Humanitarian Access->Population To Relief'
relief_to_population = 'Humanitarian Access->Relief To Population'

In [None]:
col = "subpillars_1d"
##
df_train_en_positive = df_train_en[df_train_en[col].apply(
    lambda x: x != [])].copy()
##
df_train_en_population_to_relief = df_train_en_positive[df_train_en_positive[col].apply(
    lambda x: population_to_relief in x)]
df_train_en_relief_to_population = df_train_en_positive[df_train_en_positive[col].apply(
    lambda x: relief_to_population in x)]

In [None]:
# preprocess
df_train_en["excerpt_pp"] = df_train_en["excerpt"].progress_apply(preprocess)
# tokenize and cacl ngrams
df_train_en["tokenized_excerpt"] = df_train_en["excerpt"].progress_apply(preprocess_and_tokenize)
df_train_en["bigram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 2))
df_train_en["trigram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 3))
df_train_en["fourgram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 4))
df_train_en["fivegram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 5))
df_train_en["sixgram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 6))
df_train_en["sevengram_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 7))
df_train_en["tokenized_excerpt"] = df_train_en["tokenized_excerpt"].progress_apply(lambda x: set(x))

In [None]:
# preprocess
df_val_en["excerpt_pp"] = df_val_en["excerpt"].progress_apply(preprocess)
# tokenize and cacl ngrams
df_val_en["tokenized_excerpt"] = df_val_en["excerpt"].progress_apply(preprocess_and_tokenize)
df_val_en["bigram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 2))
df_val_en["trigram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 3))
df_val_en["fourgram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 4))
df_val_en["fivegram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 5))
df_val_en["sixgram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 6))
df_val_en["sevengram_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 7))
df_val_en["tokenized_excerpt"] = df_val_en["tokenized_excerpt"].progress_apply(lambda x: set(x))

In [None]:
# preprocess
df_test_en["excerpt_pp"] = df_test_en["excerpt"].progress_apply(preprocess)
# tokenize and cacl ngrams
df_test_en["tokenized_excerpt"] = df_test_en["excerpt"].progress_apply(preprocess_and_tokenize)
df_test_en["bigram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 2))
df_test_en["trigram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 3))
df_test_en["fourgram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 4))
df_test_en["fivegram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 5))
df_test_en["sixgram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 6))
df_test_en["sevengram_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: tokenize(x, 7))
df_test_en["tokenized_excerpt"] = df_test_en["tokenized_excerpt"].progress_apply(lambda x: set(x))

In [None]:
#reporters: UNDAPEZ, OHCHR, UNHCR, ICBF, focus groups, WHO,
def population(row):
    if len({
            'idps',
            'idp',
            'women',
            'people',
            'migrants',
            'community',
            'communities',
            'families',
            'person',
            'persons',
            'children',
            'population',
            'populations',
            'victims',
            'victim',
            'employees',
            'respondents',
            'respondent',
            'interviewee',
            'interviewees',
            'venezuelans',
            'beneficiaries',
            'beneficiary',
            'survivors',
            'village',
            'villages',
            'lga',
            'lgas',
            'refugees',
            'asylum-seekers',
            'asylumseekers',
            'individuals',
            'households',
            'people',
            'immigrants',
            'citizens',
            'civilians',
            'syrian',
            'syrians',
    }
           & row["tokenized_excerpt"]) or len(
               {('human', 'rights', 'defenders'), ('those', 'who', 'move')}
               & row["trigram_excerpt"]):
        return 1
    return 0


def issue(row):
    # what is to be relieved
    if (len({
            'need',
            'needs',
            'displacements',
            'displacement',
            'affected',
            'exploited',
            'exploitation',
            'violence',
            'insecurity',
            'crisis',
            'killed',
            'massacres',
            'attacks',
            'attack',
            'murder',
            'murders',
            'violations',
            'violation',
            'killings',
            'killing',
            'crisis',
            'unemployed',
            'misinformation',
            'barrier',
            'barriers',
            'gbv',
            'sgbv',
    }
            & row["tokenized_excerpt"]) or len({
                ('sexual', 'violence'),
                ('seasonal', 'floods'),
                ('poor', 'reporting'),
                ('entered', 'irregularly'),
                ('precarious', 'settlements'),
                ('precarious', 'settlement'),
                ('spontaneous', 'settlement'),
                ('spontaneous', 'settlements'),
                ('immigration', 'status'),
                ('difficult', 'access'),
                ('lengthy', 'delays'),
                ('lengthy', 'delay'),
            } & row["bigram_excerpt"])
            or len({('situations', 'of', 'risk')} & row["trigram_excerpt"])):
        return 1
    return 0


def actor(row):
    if (len({
            'ngo',
            'ngos',
            'organization',
            'irc',
    } & row["tokenized_excerpt"]) 
            or len({
                ('un', 'humanitarian'),
                ('humanitarian', 'assistance'),
                ('humanitarian', 'operations'),
                ('humanitarian', 'organizations'),
                ('humanitarian', 'actors'),
                ('humanitarian', 'actor'),
                ('humanitarian', 'access'),
                ('humanitarian', 'operations'),
                ('humanitarian', 'operation'),
                ('humanitarian', 'mission'),
                ('humanitarian', 'missions'),
                ('health', 'actor'),
                ('health', 'actors'),
                ('health', 'organization'),
                ('health', 'organizations'),
                ('healthcare', 'organization'),
                ('healthcare', 'organizations'),
                ('health-care', 'organization'),
                ('health-care', 'organizations'),
                ('humanitarian', 'organization'),
                ('private', 'entities'),
                ('red', 'cross'),
                ('red', 'crescent'),
            }
                   & row["bigram_excerpt"]) or len({
                       ('health', 'care', 'organization'),
                       ('health', 'care', 'organizations'),
                       ('international', 'rescue', 'committee'),
                       ('health', 'sector', 'partners'),
                   } & row["trigram_excerpt"])):
        return 1
    return 0


def service(row):
    if (len({
            'assistance',
            'assistances',
            'aid',
            'treatment',
            'assisted',
            'goods',
            'mechanisms',
            'mechanism',
            'hospital',
            'hospitals',
            'food',
            'vaccine',
            'vaccines',
            'vaccination',
            'medications',
            'medication',
            'shelters',
            'shelter',
            'healthcare',
            'unaids',
            'usaid',
            'imc',
            'cesvi',
            'cris',
            'cash',
            'nutrition',
            'wash',
            'iehks',
            'nfis',
            'nfi',
            'reintegration',
            'un-clinic',
            'unclinic',
    } & row["tokenized_excerpt"]) or len({
        ('medical', 'care'),
        ('health', 'service'),
        ('health', 'services'),
        ('health', 'care'),
        ('humanitarian', 'aid'),
        ('humanitarian', 'aids'),
        ('humanitarian', 'aids'),
        ('humanitarian', 'work'),
        ('humanitarian', 'response'),
        ('hygiene', 'kits'),
        ('hygiene', 'kit'),
        ('hygiene', 'promotion'),
        ('social', 'protection'),
        ('response', 'actions'),
        ('response', 'action'),
        ('relief', 'operations'),
        ('relief', 'operation'),
        ('psychosocial', 'support'),
        ('drinking', 'water'),
        ('waste', 'management'),
        ('hot', 'meals'),
        ('treat', 'malnutrition'),
        ('health', 'kits'),
        ('nonfood', 'items'),
        ('sanitation', 'services'),
        ('protection', 'services'),
        ('ambulance', 'services'),
        ('shelter', 'services'),
        ('psychological', 'services'),
        ('work', 'service'),
        ('institutions', 'services'),
        ('healthcare', 'services'),
        ('accessing', 'services'),
        ('emergency', 'services'),
        ('basic', 'services'),
        ('response', 'plans'),
        ('emergency', 'care'),
        ('health', 'benefits'),
        ('medical', 'supplies'),
        ('needed', 'supplies'),
        ('un', 'clinic'),
        ('health', 'emergency'),
        ('feeding', 'program'),
        ('health', 'needs'),
    }
                                         & row["bigram_excerpt"])
            or len({
                ('need', 'of', 'protection'),
                ('codes', 'of', 'conduct'),
                ('core', 'relief', 'items'),
                ('repairing', 'sanitation', 'facilities'),
            } & row["trigram_excerpt"])):
        return 1
    return 0


def receipt_status(row):
    if len({
            'unable',
            'assisted',
            'neglecting',
            'hinders',
            'hinder',
            'hindering',
            'hindered',
            'provided',
            'provide',
            'benefited',
            'receive',
            'received',
            'limit',
            'limited',
            'limiting',
            'limitation',
            'limitations',
            'used',
            'restricted',
            'restricting',
            'restrictions',
            'restriction',
            'constraints',
            'constraint',
            'discontent',
            'omitted',
            'hampering',
            'hamper',
            'hampers',
            'delivered',
            'delivery',
            'deliveries',
            'inaccessible',
            'accessible',
            'suspended',
            'denied',
    } & row["tokenized_excerpt"]) or len(
        {
            ('restrict', 'access'),
            ('restricted', 'access'),
            ('cut', 'off'),
            ('can', 'access'),
            ('curtailing', 'access'),
            ('could', 'reach'),
        }
            & row["bigram_excerpt"]) or len({
                ('suspension', 'of', 'services'),
                ('able', 'to', 'access'),
                ('able', 'to', 'offer'),
                ('refusal', 'of', 'access'),
            }
                                            & row["trigram_excerpt"]):
        return 1
    return 0


def negative_actor(row):
    # what is hampering the relief
    if (len({
            'xenophobia',
            'criminals',
            'obstacles',
            'isolation',
            'government',
            'shame',
            'stigmatization',
            'stigmatisation',
            'stigmatized',
            'stigmatised',
            'ostracized',
            'ostracised',
            'ostracization',
            'ostracisation',
            'nsags',
            'discrimination',
            'floods',
            'landslides',
            'difficulty',
            'difficulties',
            'obstructed',
            'obstacle',
            'obstacles',
            'barriers',
            'barrier',
            'abuse',
            'mistreatment',
            'marginalization',
            'marginalisation',
            'neglected',
            'authorities',
            'blockages',
            'contagions',
            'blocking',
    } & row["tokenized_excerpt"]) or len({
        ('negative', 'effects'),
        ('armed', 'groups'),
        ('containment', 'measures'),
        ('security', 'conditions'),
        ('mitigation', 'measures'),
        ('criminal', 'groups'),
        ('armed', 'actors'),
        ('logistical', 'constraints'),
        ('natural', 'disasters'),
        ('natural', 'disaster'),
        ('provincial', 'directorates'),
        ('bureaucratic', 'impediments'),
        ('migratory', 'policies'),
        ('migratory', 'regulations'),
        ('local', 'officials'),
        ('migratory', 'controls'),
    }
                                         & row['bigram_excerpt'])
            or len({('lack', 'of', 'documentation'),
                    ('without', 'the', 'approval')} & row['trigram_excerpt'])):
        return 1
    return 0


############################################################
def lf_population_to_relief_pos_1(row):
    if population(row) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_2(row):
    if issue(row) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_3(row):
    if population(row) and issue(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_4(row):
    if negative_actor(row) and (actor(row)
                                or service(row)) and population(row):
        return 1
    return 0


def lf_population_to_relief_pos_5(row):
    if actor(row) and service(row) and population(row):
        return 1
    return 0


def lf_population_to_relief_pos_6(row):
    if actor(row) and population(row) and issue(row):
        return 1
    return 0


def lf_population_to_relief_pos_7(row):
    if population(row) and actor(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_8(row):
    if issue(row) and population(row) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_9(row):
    if issue(row) and population(row) and negative_actor(
            row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_10(row):
    if actor(row) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_11(row):
    if actor(row) and (population(row) or negative_actor(row)
                       or issue(row)) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_12(row):
    if actor(row) and (population(row) or negative_actor(row) or
                       issue(row)) and (service(row) or receipt_status(row)):
        return 1
    return 0


def lf_population_to_relief_pos_13(row):
    if actor(row) and (population(row) or negative_actor(row)
                       or issue(row)) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_14(row):
    if actor(row) and population(row) and (
            negative_actor(row)
            or issue(row)) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_15(row):
    if actor(row) and population(row) and negative_actor(row) and issue(
            row) and service(row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_16(row):
    if actor(row) and (issue(row) or negative_actor(row)
                       ) and receipt_status(row) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_17(row):
    if (issue(row)
            or negative_actor(row)) and receipt_status(row) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_18(row):
    if (actor(row) or
        (issue(row)
         and negative_actor(row))) and receipt_status(row) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_19(row):
    if (actor(row) or issue(row)
            or negative_actor(row)) and receipt_status(row) and service(row):
        return 1
    return 0


def lf_population_to_relief_pos_20(row):
    if (negative_actor(row) or
            issue(row)) and (actor(row) or service(row)) and population(row):
        return 1
    return 0


def lf_population_to_relief_pos_21(row):
    if (negative_actor(row)
            or issue(row)) and (actor(row)
                                or service(row)) and (population(row)
                                                      or receipt_status(row)):
        return 1
    return 0


def lf_population_to_relief_pos_22(row):
    if (negative_actor(row)
            or issue(row)) and (actor(row)
                                or service(row)) and receipt_status(row):
        return 1
    return 0



def lf_population_to_relief_pos_23(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if negative_actor(row) and service(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and receipt_status(row) and population(row):
        return 1
    if negative_actor(row) and service(row) and population(row):
        return 1
    return 0


def lf_population_to_relief_pos_24(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and receipt_status(row) and population(row):
        return 1
    if negative_actor(row) and service(row) and population(row):
        return 1
    return 0


def lf_population_to_relief_pos_25(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and receipt_status(row) and population(row):
        return 1
    if negative_actor(row) and service(row) and actor(row):
        return 1
    return 0


def lf_population_to_relief_pos_26(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and actor(row):
        return 1
    if negative_actor(row) and service(row) and actor(row):
        return 1
    return 0


def lf_population_to_relief_pos_27(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and actor(row):
        return 1
    if negative_actor(row) and service(row) and actor(row):
        return 1
    if population(row) and service(row) and actor(row):
        return 1
    if issue(row) and population(row) and actor(row):
        return 1
    if negative_actor(row) and population(row) and actor(row):
        return 1
    if negative_actor(row) and population(row) and service(
            row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_28(row):
    if negative_actor(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and service(row) and actor(row) and receipt_status(row):
        return 1
    if negative_actor(row) and service(row) and actor(row) and receipt_status(
            row):
        return 1
    if population(row) and service(row) and actor(row) and receipt_status(row):
        return 1
    if issue(row) and population(row) and actor(row) and receipt_status(row):
        return 1
    if negative_actor(row) and population(row) and actor(
            row) and receipt_status(row):
        return 1
    if negative_actor(row) and population(row) and service(
            row) and receipt_status(row):
        return 1
    return 0


def lf_population_to_relief_pos_29(row):
    if (actor(row) and receipt_status(row)
            and population(row)) and (negative_actor(row) or issue(row)
                                      or service(row)):
        return 1
    if negative_actor(row) and population(row) and service(
            row) and receipt_status(row):
        return 1
    return 0

def lf_population_to_relief_pos_30(row):
    if (actor(row) and receipt_status(row)
            and population(row)) and (negative_actor(row) or issue(row)
                                      or service(row)):
        return 1
    return 0


lfs = [
    lf_population_to_relief_pos_26,
    lf_population_to_relief_pos_27,
    lf_population_to_relief_pos_28,
    lf_population_to_relief_pos_29,
    lf_population_to_relief_pos_30,
]

In [None]:
df_train_en["lf_population_to_relief_pos_26"] = df_train_en.progress_apply(lf_population_to_relief_pos_26, axis=1)
df_train_en["lf_population_to_relief_pos_27"] = df_train_en.progress_apply(lf_population_to_relief_pos_27, axis=1)
df_train_en["lf_population_to_relief_pos_28"] = df_train_en.progress_apply(lf_population_to_relief_pos_28, axis=1)
df_train_en["lf_population_to_relief_pos_29"] = df_train_en.progress_apply(lf_population_to_relief_pos_29, axis=1)
df_train_en["lf_population_to_relief_pos_30"] = df_train_en.progress_apply(lf_population_to_relief_pos_30, axis=1)


In [None]:
lf_names = [
    "lf_population_to_relief_pos_26",
    "lf_population_to_relief_pos_27",
    "lf_population_to_relief_pos_28",
    "lf_population_to_relief_pos_29",
    "lf_population_to_relief_pos_30",
]
for col in lf_names:
    pos_pred = df_train_en[col].sum()
    tp = (df_train_en[col] & df_train_en["subpillars_1d"].apply(lambda x: population_to_relief in x)).sum()
    fp = (df_train_en[col] & ~df_train_en["subpillars_1d"].apply(lambda x: population_to_relief in x)).sum()
    fn = 173-tp
    r = round((tp/173)*100, 2)
    precision = tp/(tp+fp)
    print(f"{col}, {pos_pred=}, {tp=} {precision=:.4f}, {r=:.2f}, f1={r*precision*100:.2f}")

In [None]:
lfs_final = [
    lf_population_to_relief_pos_26,
    lf_population_to_relief_pos_27,
    lf_population_to_relief_pos_28,
    lf_population_to_relief_pos_29,
    lf_population_to_relief_pos_30,
]

def population_to_relief_kw_all(row):
    all_true = 1
    for lf in lfs_final:
        if not row[lf.__name__]:
            all_true = 0
            break
    return all_true

def population_to_relief_kw_at_least_one(row):
    all_true = 0
    for lf in lfs_final:
        if row[lf.__name__]:
            all_true = 1
            break
    return all_true
##
df_train_en["population_to_relief_final_all_kw"] = df_train_en.apply(population_to_relief_kw_all, axis=1)
df_train_en["population_to_relief_final_one_kw"] = df_train_en.apply(population_to_relief_kw_at_least_one, axis=1)

In [None]:
df_train_en["population_to_relief_final_all_kw"].sum(), df_train_en["population_to_relief_final_one_kw"].sum()

In [None]:
df_val_en["lf_population_to_relief_pos_26"] = df_val_en.progress_apply(lf_population_to_relief_pos_26, axis=1)
df_val_en["lf_population_to_relief_pos_27"] = df_val_en.progress_apply(lf_population_to_relief_pos_27, axis=1)
df_val_en["lf_population_to_relief_pos_28"] = df_val_en.progress_apply(lf_population_to_relief_pos_28, axis=1)
df_val_en["lf_population_to_relief_pos_29"] = df_val_en.progress_apply(lf_population_to_relief_pos_29, axis=1)
df_val_en["lf_population_to_relief_pos_30"] = df_val_en.progress_apply(lf_population_to_relief_pos_30, axis=1)
#
df_val_en["population_to_relief_final_all_kw"] = df_val_en.apply(population_to_relief_kw_all, axis=1)
df_val_en["population_to_relief_final_one_kw"] = df_val_en.apply(population_to_relief_kw_at_least_one, axis=1)
###
df_test_en["lf_population_to_relief_pos_26"] = df_test_en.progress_apply(lf_population_to_relief_pos_26, axis=1)
df_test_en["lf_population_to_relief_pos_27"] = df_test_en.progress_apply(lf_population_to_relief_pos_27, axis=1)
df_test_en["lf_population_to_relief_pos_28"] = df_test_en.progress_apply(lf_population_to_relief_pos_28, axis=1)
df_test_en["lf_population_to_relief_pos_29"] = df_test_en.progress_apply(lf_population_to_relief_pos_29, axis=1)
df_test_en["lf_population_to_relief_pos_30"] = df_test_en.progress_apply(lf_population_to_relief_pos_30, axis=1)
#
df_test_en["population_to_relief_final_all_kw"] = df_test_en.apply(population_to_relief_kw_all, axis=1)
df_test_en["population_to_relief_final_one_kw"] = df_test_en.apply(population_to_relief_kw_at_least_one, axis=1)

In [None]:
def only_26(row):
    lf_26 = row["lf_population_to_relief_pos_26"]
    lf_27 = row["lf_population_to_relief_pos_27"]
    lf_28 = row["lf_population_to_relief_pos_28"]
    lf_29 = row["lf_population_to_relief_pos_29"]
    lf_30 = row["lf_population_to_relief_pos_30"]
    if lf_26 and (not(lf_27 or lf_28 or lf_29 or lf_30)):
        return 1
    return 0
###
def only_27(row):
    lf_26 = row["lf_population_to_relief_pos_26"]
    lf_27 = row["lf_population_to_relief_pos_27"]
    lf_28 = row["lf_population_to_relief_pos_28"]
    lf_29 = row["lf_population_to_relief_pos_29"]
    lf_30 = row["lf_population_to_relief_pos_30"]
    if lf_27 and (not(lf_26 or lf_28 or lf_29 or lf_30)):
        return 1
    return 0
###
def only_28(row):
    lf_26 = row["lf_population_to_relief_pos_26"]
    lf_27 = row["lf_population_to_relief_pos_27"]
    lf_28 = row["lf_population_to_relief_pos_28"]
    lf_29 = row["lf_population_to_relief_pos_29"]
    lf_30 = row["lf_population_to_relief_pos_30"]
    if lf_28 and (not(lf_26 or lf_27 or lf_29 or lf_30)):
        return 1
    return 0
###
def only_29(row):
    lf_26 = row["lf_population_to_relief_pos_26"]
    lf_27 = row["lf_population_to_relief_pos_27"]
    lf_28 = row["lf_population_to_relief_pos_28"]
    lf_29 = row["lf_population_to_relief_pos_29"]
    lf_30 = row["lf_population_to_relief_pos_30"]
    if lf_29 and (not(lf_26 or lf_27 or lf_28 or lf_30)):
        return 1
    return 0
###
def only_30(row):
    lf_26 = row["lf_population_to_relief_pos_26"]
    lf_27 = row["lf_population_to_relief_pos_27"]
    lf_28 = row["lf_population_to_relief_pos_28"]
    lf_29 = row["lf_population_to_relief_pos_29"]
    lf_30 = row["lf_population_to_relief_pos_30"]
    if lf_30 and (not(lf_26 or lf_27 or lf_28 or lf_29)):
        return 1
    return 0
###

In [None]:
df_val_en["population_to_relief_final_all_kw"].sum(), df_val_en["population_to_relief_final_one_kw"].sum()

In [None]:
df_val_en["only_26"] = df_val_en.apply(only_26, axis=1)
df_val_en["only_27"] = df_val_en.apply(only_27, axis=1)
df_val_en["only_28"] = df_val_en.apply(only_28, axis=1)
df_val_en["only_29"] = df_val_en.apply(only_29, axis=1)
df_val_en["only_30"] = df_val_en.apply(only_30, axis=1)
###
df_val_en["only_26"].sum(), df_val_en["only_27"].sum(), df_val_en["only_28"].sum(), df_val_en["only_29"].sum(), df_val_en["only_30"].sum()

In [None]:
df_val_en_verify = df_val_en[(df_val_en["population_to_relief_final_all_kw"]|df_val_en["only_27"]).astype(bool)]

In [None]:
df_val_en_verify.to_excel("PopulationToRelief.xlsx", index=False, columns=['entry_id', 'excerpt'])

In [None]:
i = 20
row = df_train_en.iloc[i]
print(row["excerpt"])
print("_"*30)
##
for block in [population, actor, receipt_status, negative_actor, issue, service]:
    if block(row):
        print(block.__name__)
print("_"*30)
##
for lf in lfs:
    if lf(row) == 1:
        print(lf.__name__)

In [None]:
df_train_en_population_to_relief_c = df_train_en[
    df_train_en["subpillars_1d"].apply(lambda x: population_to_relief in x)].copy()
##
df_train_en_population_to_relief_c[
    "population"] = df_train_en_population_to_relief_c.apply(population,
                                                             axis=1)
##
df_train_en_population_to_relief_c[
    "issue"] = df_train_en_population_to_relief_c.apply(issue,
                                                             axis=1)
##
df_train_en_population_to_relief_c[
    "receipt_status"] = df_train_en_population_to_relief_c.apply(receipt_status,
                                                             axis=1)
##
df_train_en_population_to_relief_c[
    "service"] = df_train_en_population_to_relief_c.apply(service,
                                                             axis=1)
##
df_train_en_population_to_relief_c[
    "actor"] = df_train_en_population_to_relief_c.apply(actor,
                                                             axis=1)
##
df_train_en_population_to_relief_c[
    "negative_actor"] = df_train_en_population_to_relief_c.apply(negative_actor,
                                                             axis=1)