In [1]:
import json
from ast import literal_eval
from collections import Counter

import pandas as pd
from IPython.display import display

from constants import (SECTORS, SUBPILLARS_2D, SUBPILLARS_1D,
                       DEMOGRAPHIC_GROUPS, SPECIFIC_NEEDS_GROUPS,
                       AFFECTED_GROUPS, SEVERITY, RELIABILITY)

In [2]:
df = pd.read_csv("feedback_output_14.16.28.09.2021.csv")
def parse_fb(fb):
    try:
        return literal_eval(fb)
    except:
        return fb
df["Feedback"] = df["Feedback"].apply(parse_fb)

In [3]:
def process_fb(fbs):
    wrong_tags = []
    missing_tags = []
    sector_fbs = []
    two_dim_fbs = []
    one_dim_fbs = []
    demographic_g_fbs = []
    specific_ng_fbs = []
    affected_g_fbs = []
    severity_fbs = []
    geo_fbs = []
    gen_fbs = []
    geo_wrong = []
    geo_missing = []
    for fb in fbs:
        if isinstance(fb, dict):
            for missing_tag in fb["missing"]:
                if missing_tag.startswith("Geo:"):
                    geo_missing.append(missing_tag)
                else:
                    missing_tags.append(missing_tag)
            for wrong_tag in fb["wrong"]:
                if wrong_tag.startswith("Geo:"):
                    geo_wrong.append(wrong_tag)
                else:
                    wrong_tags.append(wrong_tag)
            textual_fb = fb["text"].split("\n")
            sector_fbs.append(textual_fb[0])
            two_dim_fbs.append(textual_fb[1])
            one_dim_fbs.append(textual_fb[2])
            demographic_g_fbs.append(textual_fb[3])
            specific_ng_fbs.append(textual_fb[4])
            affected_g_fbs.append(textual_fb[5])
            severity_fbs.append(textual_fb[6])
            geo_fbs.append(textual_fb[7])
        else:
            gen_fbs.append(fb)
    return (wrong_tags, missing_tags, sector_fbs, two_dim_fbs, one_dim_fbs,
            demographic_g_fbs, specific_ng_fbs, affected_g_fbs, severity_fbs,
            geo_fbs, gen_fbs, geo_wrong, geo_missing)

In [4]:
label_to_tagname = dict()
label_to_tagname.update(dict.fromkeys(SECTORS, "SECTORS"))
label_to_tagname.update(dict.fromkeys(SUBPILLARS_2D, "SUBPILLARS_2D"))
label_to_tagname.update(dict.fromkeys(SUBPILLARS_1D, "SUBPILLARS_1D"))
label_to_tagname.update(dict.fromkeys(SPECIFIC_NEEDS_GROUPS, "SPECIFIC_NEEDS_GROUPS"))
label_to_tagname.update(dict.fromkeys(AFFECTED_GROUPS, "AFFECTED_GROUPS"))
label_to_tagname.update(dict.fromkeys(DEMOGRAPHIC_GROUPS, "DEMOGRAPHIC_GROUPS"))
label_to_tagname.update(dict.fromkeys(SEVERITY, "SEVERITY"))
label_to_tagname.update(dict.fromkeys(RELIABILITY, "RELIABILITY"))

In [5]:
def process_wrong_tags(wrong_tags):
    organized_by_tagname = {
        "SECTORS": [],
        "SUBPILLARS_2D": [],
        "SUBPILLARS_1D": [],
        "DEMOGRAPHIC_GROUPS": [],
        "SPECIFIC_NEEDS_GROUPS": [],
        "AFFECTED_GROUPS": [],
        "SEVERITY": [],
        "RELIABILITY": []
    }
    for wrong_tag in wrong_tags:
        tagname = label_to_tagname[wrong_tag]
        organized_by_tagname[tagname].append(wrong_tag)
    res = dict()
    for k, v in organized_by_tagname.items():
        res[k] = Counter(v).most_common()
    return res

In [6]:
(wrong_tags, missing_tags, sector_fbs, two_dim_fbs, one_dim_fbs,
 demographic_g_fbs, specific_ng_fbs, affected_g_fbs, severity_fbs, geo_fbs,
 gen_fbs, geo_wrong, geo_missing) = process_fb(df["Feedback"])

In [7]:
wrong_organized_by_tagname = process_wrong_tags(wrong_tags)
missing_organized_by_tagname = process_wrong_tags(missing_tags)

In [8]:
pd.set_option('display.max_colwidth', None)

In [9]:
len(df)

126

### Missing Tags

In [10]:
for k, v in missing_organized_by_tagname.items():
    print(k)
    display(pd.DataFrame(v))

SECTORS


Unnamed: 0,0,1
0,Cross,3
1,Livelihoods,2
2,Food Security,2
3,Agriculture,2
4,Logistics,2
5,Shelter,1
6,Health,1


SUBPILLARS_2D


Unnamed: 0,0,1
0,Capacities & Response->Number Of People Reached/Response Gaps,11
1,"Impact->Impact On Systems, Services And Networks",10
2,Impact->Impact On People,9
3,Impact->Driver/Aggravating Factors,8
4,Humanitarian Conditions->Living Standards,8
5,Capacities & Response->National Response,7
6,Capacities & Response->International Response,6
7,Capacities & Response->Local Response,4
8,At Risk->Risk And Vulnerabilities,3
9,Humanitarian Conditions->Physical And Mental Well Being,1


SUBPILLARS_1D


Unnamed: 0,0,1
0,Context->Socio Cultural,2
1,Casualties->Dead,1
2,Shock/Event->Hazard & Threats,1
3,Humanitarian Access->Physical constraints,1


DEMOGRAPHIC_GROUPS


Unnamed: 0,0,1
0,Infants/Toddlers (<5 years old),1
1,Older Persons Unspecified gender (60+ years old),1


SPECIFIC_NEEDS_GROUPS


Unnamed: 0,0,1
0,Persons with Disability,1


AFFECTED_GROUPS


Unnamed: 0,0,1
0,Host,7
1,Refugees,3


SEVERITY


Unnamed: 0,0,1
0,Critical,4


RELIABILITY


### Wrong Tags

In [11]:
for k, v in wrong_organized_by_tagname.items():
    print(k)
    display(pd.DataFrame(v))

SECTORS


Unnamed: 0,0,1
0,Protection,3
1,Education,2
2,Cross,1
3,Health,1
4,Livelihoods,1
5,Nutrition,1


SUBPILLARS_2D


Unnamed: 0,0,1
0,Humanitarian Conditions->Physical And Mental Well Being,5
1,Capacities & Response->International Response,4
2,"Impact->Impact On Systems, Services And Networks",2
3,Capacities & Response->National Response,1
4,At Risk->Risk And Vulnerabilities,1
5,Humanitarian Conditions->Living Standards,1


SUBPILLARS_1D


Unnamed: 0,0,1
0,Context->Demography,20
1,Context->Economy,16
2,Casualties->Dead,5
3,Information and Communication->Communication means and preferences,5
4,Context->Security & Stability,3
5,Information and Communication->Knowledge and info gaps (pop),2
6,Humanitarian Access->Physical constraints,2
7,Shock/Event->Hazard & Threats,2
8,Context->Socio Cultural,2
9,Context->Environment,2


DEMOGRAPHIC_GROUPS


Unnamed: 0,0,1
0,Children/Youth Unspecified gender (5 to 17 years old),66
1,Adult Female (18 to 59 years old),22
2,Older Persons Unspecified gender (60+ years old),8
3,Children/Youth Female (5 to 17 years old),8
4,Adult Unspecified gender (18-59 years old),5
5,Older Persons Female (60+ years old),4
6,Adult Male (18 to 59 years old),3


SPECIFIC_NEEDS_GROUPS


Unnamed: 0,0,1
0,Indigenous people,30
1,Pregnant or Lactating Women,10
2,Minorities,9
3,GBV survivors,6
4,Chronically Ill,5
5,Unaccompanied or Separated Children,5
6,Elderly Head of Household,4
7,Persons with Disability,4
8,Female Head of Household,3
9,Child Head of Household,2


AFFECTED_GROUPS


Unnamed: 0,0,1
0,Affected,100
1,Displaced,82
2,Migrants,19
3,Non Displaced,12
4,Refugees,8
5,IDP,7
6,Host,4
7,Not Affected,1
8,Returnees,1


SEVERITY


Unnamed: 0,0,1
0,Major,7
1,Of Concern,7


RELIABILITY


### Geo

In [12]:
geo_wrong

['Geo: 250 Bed District Sadar Hospital',
 'Geo: Cholera',
 'Geo: Diphtheria',
 'Geo: RDT',
 'Geo: RDT/',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: RDT',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: Rohingya refugees',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: Rohingya refugees',
 'Geo: National',
 'Geo: High Dependency Unit',
 'Geo: Health Sector',
 'Geo: Health Field Monitors',
 'Geo: Aquatabs',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: Adana',
 'Geo: Bağcılar',
 'Geo: Rohingyas',
 'Geo: Border Guard Bangladesh',
 'Geo: 24 Jun',
 'Geo: Vocational Training Centre',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: Cubic',
 'Geo: IEDCR Field Labora-',
 'Geo: Rohingya',
 'Geo: 24 Jun',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: Antakya',
 'Geo: Buyukcekmece',
 'Geo: Link Road',
 'Geo: 24 Jun',
 'Geo: Antakya',
 'Geo: 182 Call Center',
 'Geo: 250 Bed District Sadar Hospital',
 'Geo: C

### Textual Feedback

In [13]:
#  '[Specific]: Specific Needs Groups is not mentioned.',
#  '[Geo]: Camp 26 is specific geo location',
#  '[Geo]: Overall Syria',
#  '[Geo]: Camps 1W, 3, 5, 17, 21, 26 and 27 missing',
#  '[Geo]: Dier-ez-zor',
#  '[Geo]: Specific camp location is missing',
#  general: 'Incomplete sentence on the excerpt. Example:\nAs a result, the MUV vocational training reverted to in-class training as of.... This sentence is not complete and miss the date.'