### Matching GIMAC Generic

In [1]:
import re
import json
import random
from pprint import pprint
from ast import literal_eval
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [2]:
random.seed(2021)
np.random.seed(2021)

In [3]:
# each line in this table correspond to single tagging of an entry.
# an entry may have more than one tagging (e.g. a tag in 2D matrix, another in 1D matrix,
# secondary tags, etc. )
exportdata = pd.read_csv("exportdata.csv")
# I pulled entries related to projects chosen by Patrice
entries = pd.read_csv("entries.csv")
# The widgets of AF's, i.e. 2D matrices, 1D matrices, secondary tagging widgets, etc.
af_widgets = pd.read_csv("af_widgets_of_interest.csv")
# Projects chosen by Patrice
projects = pd.read_csv("projects.csv")
#
exportables = pd.read_csv("af_exportables.csv")
# AF details
afs = pd.read_csv("analysis_frameworks.csv")
# user ids, first names and last names
users = pd.read_csv("user_names.csv")
# The matchin of pillar/sub-pillar names from different AFs into unified pillar/sub-pillar names
matching_gimac = pd.read_csv("gimac_to_vaf.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
af_id_to_title = afs[["id", "title"]].set_index("id")["title"].to_dict()
af_title_to_ids = afs[["id", "title"]].groupby("title")["id"].apply(list).to_dict()
af_title_to_ids["GIMAC Generic"]

[1465]

In [5]:
# keep "GIMAC Generic"'s data
entries = entries[entries["analysis_framework_id"].eq(1465)]
af_widgets = af_widgets[af_widgets["analysis_framework_id"].eq(1465)]
exportables = exportables[exportables["analysis_framework_id"].eq(1465)]
exportdata = exportdata[exportdata["exportable_id"].isin(exportables.id)]

In [6]:
exportdata[exportdata["exportable_id"].isin(exportables.id)]["entry_id"].unique().shape

(42220,)

In [7]:
raw_name_to_tag_dict = {
    "Severity":"Severity",
    "Severity (Needs assessment entries only)":"Severity",
    "SEVERITY":"Severity",
    
    "Reliability":"Reliability",
    "RELIABILITY":"Reliability",
    
    "Demographic Groups":"Demographic Groups",
    "DEMOGRAPHIC GROUPS":"Demographic Groups",
    
    "Information date":"Information Date",
    "Information Date":"Information Date",
    "Date":"Information Date",
    "DATE OF INFORMATION":"Information Date",
    "Date range":"Information Date",
    
    "Geo Location":"Geo Location",
    "LOCATION":"Geo Location",
    "Geo location":"Geo Location",
    "Country":"Geo Location",
    "GEOLOCATIONS":"Geo Location",
    
    "Affected groups":"Affected Groups",
    "AFFECTED GROUPS":"Affected Groups",
    
    "Specific Needs Groups":"Specific Needs Groups",
    "SPECIFIC NEEDS GROUPS":"Specific Needs Groups",
}


In [8]:
def clean_titles(x):
    if x in list(raw_name_to_tag_dict.keys()):
        return raw_name_to_tag_dict[x]
    return x.title()


af_widgets['title'] = af_widgets['title'].apply(clean_titles)

In [9]:
af_widgets['title']

146    Specific Needs Groups
208               Pre-Crisis
209                In-Crisis
213             Geo Location
244                  Excerpt
246              Shock/Event
248              Reliability
253       Additional Context
254       Displaced Pop Type
257            Cleaning Tags
258        Population Groups
259        Cleaning Comments
260       Demographic Groups
261          High Level Tags
Name: title, dtype: object

In [10]:
# widget names of 2D matrices in the chosen AFs's
mat2d_titles = [
    "Pre-Crisis",
    "Shock/Event",
    "In-Crisis",
]
# Pandas will read json objects as strings,
# here I am converting them into Python dict objects
af_widgets["properties"] = af_widgets["properties"].apply(json.loads)
##
mat2d_properties_ids = af_widgets[(
    af_widgets["title"].str.title()).isin(mat2d_titles)][[
        "properties", "analysis_framework_id"
    ]]
mat2d_properties = mat2d_properties_ids["properties"].tolist()
mat2d_ids = mat2d_properties_ids["analysis_framework_id"].tolist()
## 1D Matrices
# widget names of 1D matrices in the chosen AFs's
mat1d_titles = ["High Level Tags"]

mat1d_properties_ids = af_widgets[(
    af_widgets["title"].str.upper()).isin(mat1d_titles)][[
        "properties", "analysis_framework_id"
    ]]
mat1d_properties = mat1d_properties_ids["properties"].tolist()
mat1d_ids = mat1d_properties_ids["analysis_framework_id"].tolist()

In [11]:
afids_pillars_subpillars = dict()
errors = []
for mat, af_id in zip(mat2d_properties, mat2d_ids):
    try:
        dims = mat["data"]['dimensions']
        afids_pillars_subpillars[af_id] = {}
        for dim in dims:
            pillar = dim["title"]
            sub_pillars = []
            for sub_pillar_dict in dim["subdimensions"]:
                sub_pillars.append(sub_pillar_dict["title"])
            afids_pillars_subpillars[af_id][pillar] = sub_pillars
    except KeyError:
        errors.append([mat, af_id])
##
afids_rows_cells = dict()
for mat, af_id in zip(mat1d_properties, mat1d_ids):
    try:
        rows = mat["data"]['rows']
        afids_rows_cells[af_id] = {}
        for row in rows:
            pillar = row["title"]
            sub_pillars = []
            for sub_pillar_dict in row["cells"]:
                sub_pillars.append(sub_pillar_dict["value"])
            afids_rows_cells[af_id][pillar] = sub_pillars
    except KeyError:
        errors.append([mat, af_id])

In [12]:
def extract_title(x):
    if x["excel"].get("title"):
        return x["excel"]["title"]
    elif x["excel"].get("type") == "multiple":
        return x["excel"]["titles"]
##
exportables["data"] = exportables["data"].apply(literal_eval)
exportable_titles = exportables["data"].apply(extract_title).tolist()

In [13]:
print(entries.shape, exportdata.shape)
entries = entries[entries["entry_type"].eq("excerpt") & (~entries["excerpt"].isna())]
print(entries.shape, exportdata.shape)

(42220, 21) (186929, 4)
(38148, 21) (186929, 4)


In [14]:
exid_to_exdata = dict()
for ex_id, ex_data in zip(exportables["id"], exportables["data"]):
    exid_to_exdata[ex_id] = ex_data

In [15]:
widget_key_id_to_title = dict()
for w_key, w_id, title in zip(af_widgets["key"], af_widgets["widget_id"], af_widgets["title"]):
    widget_key_id_to_title[(w_key, w_id)] = title

In [16]:
def exportdata_to_tag_title(row):
    data = json.loads(row[1])
    wkey, wid = None, None
    if data.get("common"):
        wkey = data.get("common").get("widget_key")
        wid = data["common"].get("widget_id")
    if wkey and wid:
        if widget_key_id_to_title.get((wkey, wid)):
            return widget_key_id_to_title.get((wkey, wid))
    if data.get("report") and data["report"].get("other"):
            if len(data["report"]["other"]) == 1 and data["report"]["other"][0].get("title"):
                if data["report"]["other"][0]["title"]:
                    return data["report"]["other"][0]["title"]
    if isinstance(data.get("excel"), list) and len(data["excel"])==1 and \
     data["excel"][0].get("widget_key") and data["excel"][0].get("widget_id"):
        wkey = data["excel"][0]["widget_key"]
        wid = data["excel"][0]["widget_id"]
        if widget_key_id_to_title.get((wkey, wid)):
            return widget_key_id_to_title.get((wkey, wid))
    exportable_id = row[3]
    if exid_to_exdata[exportable_id]["excel"].get("title"):
        return exid_to_exdata[exportable_id]["excel"]["title"]
    elif exid_to_exdata[exportable_id]["excel"].get("type") == "multiple":
        return exid_to_exdata[exportable_id]["excel"]["titles"]
    raise

In [17]:
exportdata["tag_title"] = exportdata.apply(exportdata_to_tag_title, axis=1)

In [18]:
def exportdata_to_tag_value(row):
    data = json.loads(row[1])
    if isinstance(data["excel"], list) and len(data["excel"])==1:
        return data["excel"][0]["value"]
    if data["excel"].get("type") == "lists":
        return data["excel"]["values"]
    if isinstance(data["excel"], dict) and "value" in data["excel"]:
        return data["excel"].get("value")
    if isinstance(data["excel"], dict) and "values" in data["excel"]:
        return data["excel"].get("values")
    elif "values" in data["common"]:
        return data["common"]["values"]
    elif "value" in data["common"]:
        return data["common"]["value"]
    raise

In [19]:
exportdata["tag_value"] = exportdata.apply(exportdata_to_tag_value, axis=1)

In [20]:
def title_case(tag):
    if isinstance(tag, (list, tuple)):
        return tuple([x.title() for x in tag])
    return tag.title()

In [21]:
exportdata["tag_title"] = exportdata["tag_title"].apply(title_case)

In [22]:
mat_1d_and_flag_titles = ["High Level Tags"]#, ('High Level Tags - Dimension', 'High Level Tags - Subdimension')]

In [23]:
exportdata["tag_title"].unique()

array(['Pre-Crisis', 'Population Groups', 'Geo Location', 'Reliability',
       'Demographic Groups', 'Shock/Event', 'High Level Tags',
       'In-Crisis', 'Specific Needs Groups', 'Additional Context',
       'Displaced Pop Type', 'Cleaning Tags', 'Cleaning Comments',
       ('In-Crisis - Dimension', 'In-Crisis - Subdimension', 'In-Crisis - Sector', 'In-Crisis - Subsectors'),
       ('Pre-Crisis - Dimension', 'Pre-Crisis - Subdimension', 'Pre-Crisis - Sector', 'Pre-Crisis - Subsectors'),
       ('High Level Tags - Dimension', 'High Level Tags - Subdimension')],
      dtype=object)

In [24]:
af_widgets["title"].unique()

array(['Specific Needs Groups', 'Pre-Crisis', 'In-Crisis', 'Geo Location',
       'Excerpt', 'Shock/Event', 'Reliability', 'Additional Context',
       'Displaced Pop Type', 'Cleaning Tags', 'Population Groups',
       'Cleaning Comments', 'Demographic Groups', 'High Level Tags'],
      dtype=object)

In [25]:
#exportdata[exportdata["tag_title"].isin([('High Level Tags - Dimension', 'High Level Tags - Subdimension')])]["tag_value"].tolist()
# exportdata[exportdata["tag_title"].isin([('In-Crisis - Dimension', 'In-Crisis - Subdimension', 'In-Crisis - Sector', 'In-Crisis - Subsectors'),
#        ('Pre-Crisis - Dimension', 'Pre-Crisis - Subdimension', 'Pre-Crisis - Sector', 'Pre-Crisis - Subsectors')])]["tag_value"].tolist()
#exportdata[exportdata["tag_title"].eq('High Level Tags')]["tag_value"].tolist()
exportdata[exportdata["tag_title"].eq('High Level Tags')]["tag_value"].shape, \
exportdata[exportdata["tag_title"].eq('Pre-Crisis')]["tag_value"].shape, \
exportdata[exportdata["tag_title"].eq('In-Crisis')]["tag_value"].shape, \
exportdata[exportdata["tag_title"].eq('Shock/Event')]["tag_value"].shape

((8024,), (8644,), (34198,), (4647,))

In [26]:
exportdata_of_interest = exportdata[exportdata["tag_title"].isin(
    mat2d_titles + mat_1d_and_flag_titles)]

In [27]:
exportdata_of_interest.shape, entries.shape

((55513, 6), (38148, 21))

In [28]:
entries_labeled = pd.merge(entries,
                           exportdata_of_interest,
                           how="inner",
                           left_on="id",
                           right_on="entry_id",
                           suffixes=('_entry', '_exportdata'))

In [29]:
entries_labeled["id_entry"].shape, entries_labeled["id_entry"].unique().shape

((49789,), (38148,))

In [30]:
def agg_group(group):
    tag_titles = group["tag_title"]
    tag_values = group["tag_value"]
    #
    grouped = defaultdict(list)
    for title, value in zip(tag_titles, tag_values):
        if title in mat_1d_and_flag_titles:
            grouped["Mat1D"].extend(value)
        elif title in mat2d_titles:
            grouped["Mat2D"].extend(value)
    return grouped

In [31]:
entries_labeled.columns

Index(['id_entry', 'created_at', 'modified_at', 'excerpt', 'image_raw',
       'analysis_framework_id', 'created_by_id', 'lead_id', 'modified_by_id',
       'entry_type', 'information_date', 'order', 'client_id', 'project_id',
       'tabular_field_id', 'dropped_excerpt', 'highlight_hidden', 'verified',
       'verification_last_changed_by_id', 'image_id', 'title', 'id_exportdata',
       'data', 'entry_id', 'exportable_id', 'tag_title', 'tag_value'],
      dtype='object')

In [32]:
tags = entries_labeled.groupby("id_entry").apply(agg_group)

In [33]:
entryids_tags = pd.DataFrame({"entry_id": tags.index, "tags": tags.values})

In [34]:
entryids_tags.shape

(38148, 2)

In [35]:
entries_ = entries.rename({"id": "entry_id"}, axis=1, inplace=False)
#
print(entries.shape)
#
entries_ = entries_[[
    'entry_id',
    'created_at',
    'modified_at',
    'excerpt',
    'entry_type',
    'analysis_framework_id',
    'created_by_id',
    'lead_id',
    'modified_by_id',
    'information_date',
    'order',
    'project_id',
    'verified',
]].drop_duplicates()
#
print(entries_.shape)

(38148, 21)
(38148, 13)


In [36]:
entries_labeled = pd.merge(entries_, entryids_tags, on="entry_id", how="inner")
entries_labeled.shape, entries_labeled.columns

((38148, 14),
 Index(['entry_id', 'created_at', 'modified_at', 'excerpt', 'entry_type',
        'analysis_framework_id', 'created_by_id', 'lead_id', 'modified_by_id',
        'information_date', 'order', 'project_id', 'verified', 'tags'],
       dtype='object'))

In [37]:
entries_labeled["tags_str"] = entries_labeled["tags"].apply(str)
entries_labeled.duplicated(subset=["entry_id", "tags_str"]).sum()

0

In [38]:
entries_labeled["entry_id"].duplicated().sum()

0

In [39]:
entries_labeled.shape

(38148, 15)

In [40]:
entries_labeled["tags"].isna().sum()

0

In [41]:
entries.shape, entries[~entries["excerpt"].isna()]["excerpt"].unique().shape

((38148, 21), (37729,))

In [42]:
entries_labeled.shape, entries_labeled.columns

((38148, 15),
 Index(['entry_id', 'created_at', 'modified_at', 'excerpt', 'entry_type',
        'analysis_framework_id', 'created_by_id', 'lead_id', 'modified_by_id',
        'information_date', 'order', 'project_id', 'verified', 'tags',
        'tags_str'],
       dtype='object'))

In [43]:
def tags_to_pillars(x):
    tags = x[0]
    af_id = x[1]
    pillars = []
    for tag_key, tag_value in tags.items():
        if len(tag_value):
            for t in tag_value:
                if t[0]:
                    pillars.append(
                        tag_key[-2:] + "->" + t[0].title() + "->" + t[1].title())
    return list(set(pillars))

def tags_to_sectors(x):
    tags = x[0]
    af_id = x[1]
    sectors = []
    for tag_key, tag_value in tags.items():
        if tag_key != "Mat2D":
            continue
        if len(tag_value):
            for t in tag_value:
                if len(t) > 2 and t[2]:
                    sectors.append(t[2].title())
    return list(set(sectors))

In [44]:
entries_labeled.columns

Index(['entry_id', 'created_at', 'modified_at', 'excerpt', 'entry_type',
       'analysis_framework_id', 'created_by_id', 'lead_id', 'modified_by_id',
       'information_date', 'order', 'project_id', 'verified', 'tags',
       'tags_str'],
      dtype='object')

In [45]:
entries_labeled["pillars"] = entries_labeled[["tags", 'analysis_framework_id'
                                              ]].apply(tags_to_pillars, axis=1)
entries_labeled["sectors"] = entries_labeled[["tags", 'analysis_framework_id'
                                              ]].apply(tags_to_sectors, axis=1)

In [46]:
entries_labeled[~entries_labeled["excerpt"].isna()]["excerpt"].unique().shape

(37729,)

In [47]:
pills_unique_1d = set()
pills_occurances_1d = list()
for pills_1d in entries_labeled["pillars"]:
    pills_unique_1d.update(pills_1d)
    pills_occurances_1d.extend(pills_1d)
len(pills_unique_1d), Counter(pills_occurances_1d).most_common()

(49,
 [('2D->Operational Environment->International Humanitarian Response And Capacities',
   7480),
  ('2D->Humanitarian Conditions->Humanitarian Condition', 7199),
  ('2D->Humanitarian Conditions->Risk', 3499),
  ('2D->Stated Priorities And Recommendations->Recommendations From Aid/Research Actors',
   3442),
  ('2D->Operational Environment->National/Sub-National Response And Capacities',
   3290),
  ('2D->Humanitarian Situation->Humanitarian Profile', 2914),
  ('1D->General ->Risk', 2339),
  ('2D->Effects On Population->Post-Shock Displacement', 1996),
  ('2D->Effects Systems And Networks->Basic Infrastructure And Social Services',
   1843),
  ('2D->Effects On Population->Disruption Of Social Behaviors And Networks',
   1749),
  ('2D->Effects On Population->Damage To Physical Integrity', 1724),
  ('2D->Effects On Population->Disruption Of Resources And Assets', 1303),
  ('2D->Operational Environment->Humanitarian Access', 1252),
  ('1D->General ->Response Gap', 1110),
  ('2D->Shock 

In [48]:
secs_unique = set()
secs_occurances = list()
for secs in entries_labeled["sectors"]:
    secs_unique.update(secs)
    secs_occurances.extend(secs)
#secs_unique
Counter(secs_occurances).most_common()

[('Cross-Sector', 12411),
 ('Health', 8676),
 ('Protection', 7239),
 ('Food Security', 4568),
 ('Livelihoods', 3974),
 ('Wash', 3345),
 ('Education', 2923),
 ('Shelter And Nfis', 2002),
 ('Nutrition', 2000),
 ('Cccm', 678),
 ('Shelter', 311)]

In [49]:
entries_labeled.columns

Index(['entry_id', 'created_at', 'modified_at', 'excerpt', 'entry_type',
       'analysis_framework_id', 'created_by_id', 'lead_id', 'modified_by_id',
       'information_date', 'order', 'project_id', 'verified', 'tags',
       'tags_str', 'pillars', 'sectors'],
      dtype='object')

In [50]:
final_df = entries_labeled[entries_labeled["entry_type"].eq("excerpt")
                           & (~entries_labeled["excerpt"].isna())]
final_df.shape

(38148, 17)

In [51]:
def remove_newlines(excerpt):
    if not isinstance(excerpt, str):
        return excerpt
    return re.sub("\s+", " ", excerpt)

final_df.loc[:, 'excerpt'] = final_df['excerpt'].apply(remove_newlines)
#final_df.loc[:, 'dropped_excerpt'] = final_df['dropped_excerpt'].apply(remove_newlines)
##
final_df = final_df.sort_values("verified").reset_index(drop=True).drop_duplicates(subset='excerpt', keep="last")
final_df.shape, final_df.duplicated(subset="excerpt").sum()

((37711, 17), 0)

In [52]:
sector_name_mapper = {
    "Agriculture": "Agriculture",
    "Cccm": "",
    "Cross": "Cross",
    "Cross Cutting": "Cross",
    "Cross-Sector": "Cross",
    "Education": "Education",
    "Food": "Food Security",
    "Food Security": "Food Security",
    "Nutrition": "Nutrition",
    "Health": "Health",
    "Livelihood": "Livelihoods",
    "Livelihoods": "Livelihoods",
    "Logistic": "Logistics",
    "Logistics": "Logistics",
    "Protection": "Protection",
    "Shelter": "Shelter",
    "Shelter And Nfis": "Shelter",
    "Nfi": "",
    "Wash": "WASH",
}

In [53]:
af_title_to_id = dict()
for afid, title in zip(afs["id"], afs["title"]):
    af_title_to_id["title"] = afid

In [54]:
original_to_virtual = dict()
for otype, opillar, osubpillar, ftype, fpillar, fsubpillar in zip(
        matching_gimac["Type"],
        matching_gimac["Pillar"],
        matching_gimac["Sub-pillar"],
        matching_gimac["Virtual Type"],
        matching_gimac["Virtual Pillar"],
        matching_gimac["Virtual Sub-pillar"],
):
    if ftype!=ftype:
        continue
    original = f"{otype.title()}->{opillar.title()}->{osubpillar.title()}"
    virtual = f"{ftype.title()}->{fpillar.title()}->{fsubpillar.title()}"
    original_to_virtual[original] = virtual

In [55]:
def sector_mapper(sec):
    if sec == sec:
        sectors =  [sector_name_mapper.get(s, "") for s in sec]
        return [sec for sec in sectors if sec]
    return []
##
def pillar_mapper(dim):
    if dim == dim:
        dim = [original_to_virtual.get(d, "") for d in dim]
        dim = [d for d in dim if d]
        dim = [d.split("->")[1] for d in dim if d.startswith("2D")]
        return dim
    return []
##
def subpillar_mapper(subdim):
    if subdim == subdim:
        subdim = [original_to_virtual.get(d, "") for d in subdim]
        subdim = [d for d in subdim if d]
        subdim = [d[4:] for d in subdim if d.startswith("2D")]
        return subdim
    return []
##
def pillar_mapper_1d(dim):
    if dim == dim:
        dim = [original_to_virtual.get(d, "") for d in dim]
        dim = [d for d in dim if d]
        dim = [d.split("->")[1] for d in dim if d.startswith("1D")]
        return dim
    return []
##
def subpillar_mapper_1d(subdim):
    if subdim == subdim:
        subdim = [original_to_virtual.get(d, "") for d in subdim]
        subdim = [d for d in subdim if d]
        subdim = [d[4:] for d in subdim if d.startswith("1D")]
        return subdim
    return []

In [56]:
final_df["sectors"] = final_df["sectors"].apply(sector_mapper)
final_df["subpillars_2d"] = final_df["pillars"].apply(subpillar_mapper)
final_df["subpillars_1d"] = final_df["pillars"].apply(subpillar_mapper_1d)
final_df["pillars_1d"] = final_df["pillars"].apply(pillar_mapper_1d)
final_df["pillars_2d"] = final_df["pillars"].apply(pillar_mapper)
final_df.drop(columns="pillars", inplace=True)

In [57]:
secs_unique = set()
secs_occurances = list()
for secs in final_df["sectors"]:
    secs_unique.update(secs)
    secs_occurances.extend(secs)
#secs_unique
Counter(secs_occurances).most_common()

[('Cross', 12281),
 ('Health', 8557),
 ('Protection', 7193),
 ('Food Security', 4513),
 ('Livelihoods', 3925),
 ('WASH', 3318),
 ('Education', 2887),
 ('Shelter', 2293),
 ('Nutrition', 1979)]

In [58]:
pills_unique = set()
pills_occurances = list()
for pills in final_df["pillars_2d"]:
    pills_unique.update(pills)
    pills_occurances.extend(pills)
#pills_unique
Counter(pills_occurances).most_common()

[('Capacities & Response', 10635),
 ('Priority Interventions', 3384),
 ('Impact', 3011),
 ('Humanitarian Conditions', 2668),
 ('Priority Needs', 562)]

In [59]:
subpills_unique = set()
subpills_occurances = list()
for subpills in final_df["subpillars_2d"]:
    subpills_unique.update(subpills)
    subpills_occurances.extend(subpills)
Counter(subpills_occurances).most_common()

[('Capacities & Response->International Response', 7385),
 ('Priority Interventions->Expressed By Humanitarian Staff', 3384),
 ('Capacities & Response->National Response', 3250),
 ('Impact->Impact On People', 3011),
 ('Humanitarian Conditions->Physical And Mental Well Being', 1711),
 ('Humanitarian Conditions->Coping Mechanisms', 957),
 ('Priority Needs->Expressed By Population', 562)]

In [60]:
pills_unique_1d = set()
pills_occurances_1d = list()
for pills_1d in final_df["pillars_1d"]:
    pills_unique_1d.update(pills_1d)
    pills_occurances_1d.extend(pills_1d)
#pills_unique_1d
Counter(pills_occurances_1d).most_common()

[('Context', 3407)]

In [61]:
subpills_unique_1d = set()
subpills_occurances_1d = list()
for subpills_1d in final_df["subpillars_1d"]:
    subpills_unique_1d.update(subpills_1d)
    subpills_occurances_1d.extend(subpills_1d)
#subpills_unique_1d
Counter(subpills_occurances_1d).most_common()

[('Context->Security & Stability', 1024),
 ('Context->Economy', 857),
 ('Context->Socio Cultural', 576),
 ('Context->Environment', 326),
 ('Context->Legal & Policy', 261),
 ('Context->Demography', 193),
 ('Context->Politics', 170)]

In [62]:
final_df["subpillars_1d"].apply(lambda x:x!=[]).sum(),\
final_df["subpillars_2d"].apply(lambda x:x!=[]).sum(),\
final_df["sectors"].apply(lambda x:x!=[]).sum()

(3029, 17367, 37141)