In [1]:
import pandas as pd
import numpy as np
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
import warnings

py.offline.init_notebook_mode(connected = True)
warnings.filterwarnings('ignore')

# Baseline Data

In [2]:
# reading the data
df = pd.read_excel("../data/Baseline/EWLI_RawData.xlsx")

In [4]:
df.head()

Unnamed: 0,start,end,today,Intro,City,Enumerator,enumj_id,loc_herat,loc_jbad,a_basics/a01,...,_uuid,_submission_time,_index,_parent_table_name,_parent_index,_tags,_notes,_version,_duration,_submitted_by
0,2015-04-06T10:18:51.698+04:30,2015-04-06T16:57:23.812+04:30,2015-04-06,,herat,samira,,herat_01,,male,...,a8917c45-cbfa-48fb-8479-afae832fa4f7,2015-04-06T12:54:51,2,,-1,,,201504010942,23912.0,
1,2015-04-06T10:37:05.429+04:30,2015-04-06T16:59:10.999+04:30,2015-04-06,,herat,samira,,herat_01,,female,...,d8645deb-e55e-4baf-896e-261788079da8,2015-04-06T12:54:56,3,,-1,,,201504010942,22925.0,
2,2015-04-06T10:57:02.577+04:30,2015-04-06T16:59:58.126+04:30,2015-04-06,,herat,samira,,herat_01,,female,...,456cfefd-0cc6-41d3-8fa4-5c95ca57cfd1,2015-04-06T12:55:01,4,,-1,,,201504010942,21776.0,
3,2015-04-06T11:18:34.904+04:30,2015-04-06T17:02:29.899+04:30,2015-04-06,,herat,samira,,herat_01,,female,...,a122c9d2-e7ab-4b7e-835c-7d667769393f,2015-04-06T12:55:05,5,,-1,,,201504010942,20635.0,
4,2015-04-06T11:33:24.533+04:30,2015-04-06T17:01:38.354+04:30,2015-04-06,,herat,samira,,herat_01,,female,...,5e16da4c-ed63-4c26-b17f-3a64b2409a8c,2015-04-06T12:55:10,6,,-1,,,201504010942,19694.0,


# Part 1: Splitting the dataframe into categories (Subsets)

### Category 1 : basics and household composition

In [5]:
start_index = df.columns.get_loc("a_basics/a01")
end_index = df.columns.get_loc("a_hh")
df_basics_hh = df[df.columns[start_index: end_index]] 

In [6]:
# sub-dataframe for basics and hosehold data
df_basics_hh.head()

Unnamed: 0,a_basics/a01,a_basics/a02,a_basics/a03,a_basics/a03_other,b_hh/b01a,b_hh/b01b,b02a,b02b,b03a,/b03b,...,b06a,b06b,hh_male,hh_female,hh_total,hh_eligbilefem,hh_girls,hh_boys,hh_lit_female,hh_lit_male
0,male,married living with spouse,tajik,,0,0,0,0,1,2,...,2.0,3,4,3,7,3,1,2,3,4
1,female,married living with spouse,hazara,,0,0,0,0,0,0,...,2.0,0,2,3,5,3,0,0,3,2
2,female,married living with spouse,pashtun,,0,0,0,0,0,4,...,3.0,5,10,7,17,7,0,4,7,10
3,female,widow,tajik,,0,0,0,0,0,0,...,2.0,1,4,5,9,4,0,0,5,4
4,female,married living with spouse,tajik,,0,0,0,1,2,0,...,4.0,2,4,4,8,4,2,1,4,3


### Category 2 : Origin


In [7]:
start_index = df.columns.get_loc("c_origin/c01")
end_index = df.columns.get_loc("c_origin/c06_other")
df_origin = df[df.columns[start_index: end_index]] 

In [8]:
df_origin.head()

Unnamed: 0,c_origin/c01,c_origin/c01aa,c_origin/c01ab,c_origin/c01ab_other,c_origin/c01ac,c_origin/c01ac_other,c_origin/c02pre,c_origin/c02pre_other,c_origin/c02,c_origin/c03,...,c_origin/c05,c_origin/c05_other,c_origin/c06,c_origin/c06/why_insecurity,c_origin/c06/why_persecution,c_origin/c06/why_livelihoods,c_origin/c06/why_shelter,c_origin/c06/why_land,c_origin/c06/why_services,c_origin/c06/other
0,yes,yes,,,,,liv_afg,,herat,enjil,...,fut_stay,,,,,,,,,
1,yes,yes,,,,,liv_afg,,herat,enjil,...,fut_stay,,,,,,,,,
2,yes,yes,,,,,liv_afg,,herat,enjil,...,fut_stay,,,,,,,,,
3,yes,yes,,,,,liv_afg,,herat,zendahjan,...,,,,,,,,,,
4,yes,yes,,,,,liv_afg,,herat,enjil,...,fut_stay,,,,,,,,,


### Category 3 : Displacement

In [9]:
start_index = df.columns.get_loc("d_idps/d01")
end_index = df.columns.get_loc("d_idps/d08_other")
df_displacement = df[df.columns[start_index: end_index]] 

In [10]:
df_displacement.head()

Unnamed: 0,d_idps/d01,d_idps/d01/dis_armed,d_idps/d01/dis_military,d_idps/d01/dis_intimidation,d_idps/d01/dis_tribal,d_idps/d01/dis_land,d_idps/d01/dis_feud,d_idps/d01/dis_mines,d_idps/d01/dis_shelling,d_idps/d01/dis_naturaldis,...,d_idps/d06/rr_assistance,d_idps/d07,d_idps/d08,d_idps/d08/inte_land,d_idps/d08/inte_school,d_idps/d08/inte_job,d_idps/d08/inte_serivces,d_idps/d08/inte_hostile,d_idps/d08/inte_gov,d_idps/d08/other
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


### Category 4 : Food access


In [11]:
start_index = df.columns.get_loc("e_food/e_fcs/e_fcs_intro")
end_index = df.columns.get_loc("e_food/e35_other")
df_food = df[df.columns[start_index: end_index]] 

In [12]:
df_food.head()

Unnamed: 0,e_food/e_fcs/e_fcs_intro,e_food/e_fcs/e01,e_food/e_fcs/e02,e_food/e_fcs/e03,e_food/e_fcs/e04,e_food/e_fcs/e05,e_food/e_fcs/e06,e_food/e_fcs/e07,e_food/e_fcs/e08,e_food/e_fcs/e09,...,e_food/e_hfias/e32a,e_food/e_hfias/e33,e_food/e_hfias/e33a,e_food/e34,e_food/e34_other,e_food/e35,e_food/e35/food_taste,e_food/e35/food_price,e_food/e35/food_health,e_food/e35/other
0,,7,7,5,4,3,3,7,7,7,...,,0,,dec_femalehh,,food_taste food_price,True,True,False,False
1,,7,3,4,7,3,4,6,7,7,...,,0,,dec_femalehh,,food_taste food_price,True,True,False,False
2,,7,3,5,0,2,0,6,7,7,...,,0,,dec_femalehh,,food_price,False,True,False,False
3,,7,2,2,0,1,3,4,7,7,...,,0,,dec_femalehh,,food_price,False,True,False,False
4,,7,2,3,0,0,0,0,7,7,...,3.0,1,3.0,dec_femalehh,,food_price,False,True,False,False


### Category 5 : Access to Basic Services


In [13]:
start_index = df.columns.get_loc("f_access/f_lit/f01a")
end_index = df.columns.get_loc("f_access/f11_other")
df_basic_services = df[df.columns[start_index: end_index]] 

In [14]:
df_basic_services.head()

Unnamed: 0,f_access/f_lit/f01a,f_access/f_lit/f01b,f_access/f_lit/f01a1,f_access/f_lit/f01a1/ill_noschoolg,f_access/f_lit/f01a1/ill_fam,f_access/f_lit/f01a1/ill_money,f_access/f_lit/f01a1/ill_far,f_access/f_lit/f01a1/ill_workh,f_access/f_lit/f01a1/ill_workinc,f_access/f_lit/f01a1/ill_language,...,f_access/f11/hissue_lack,f_access/f11/hissue_long,f_access/f11/hissue_cost,f_access/f11/hissue_qualified,f_access/f11/hissue_med,f_access/f11/hissue_femdoc,f_access/f11/hissue_femnurse,f_access/f11/hissue_all,f_access/f11/hissue_none,f_access/f11/other
0,3.0,4.0,,,,,,,,,...,False,False,False,False,False,False,False,False,True,False
1,3.0,1.0,,,,,,,,,...,True,False,True,True,False,False,True,False,False,False
2,4.0,9.0,,,,,,,,,...,True,False,False,False,True,False,False,False,False,False
3,0.0,1.0,,,,,,,,,...,False,False,False,False,False,False,False,False,True,False
4,3.0,3.0,,,,,,,,,...,False,False,False,False,False,False,False,False,True,False


### Category 6 : Social Safety Nets


In [15]:
## This category has only 1 column
#start_index = df.columns.get_loc("g_social/g01")
#end_index = df.columns.get_loc("c_origin/c06_other")
#df_origin = df[df.columns[start_index: end_index]] 

### Category 7 : Assets


In [16]:
start_index = df.columns.get_loc("h_assets/h01")
end_index = df.columns.get_loc("h_assets/h23")
df_assets = df[df.columns[start_index: end_index]] 

In [17]:
df_assets.head()

Unnamed: 0,h_assets/h01,h_assets/h01_other,h_assets/h01a,h_assets/h02,h_assets/h03,h_assets/h03_other,h_assets/h04,h_assets/h04/livestock_cattle,h_assets/h04/livestock_buffalo,h_assets/h04/livestock_horse,...,h_assets/h_dai/h13,h_assets/h_dai/h14,h_assets/h_dai/h15,h_assets/h_dai/h16,h_assets/h_dai/h17,h_assets/h18,h_assets/h19,h_assets/h20,h_assets/h21,h_assets/h22
0,shelter_own,,40,yes,deed_formal,,livestock_none,False,False,False,...,yes,yes,yes,yes,no,yes,yes,no,no,yes
1,shelter_own,,30,no,,,livestock_none,False,False,False,...,yes,no,no,no,no,yes,yes,no,no,no
2,shelter_own,,30,no,,,livestock_none,False,False,False,...,yes,yes,yes,yes,no,yes,yes,no,no,no
3,shelter_house,,20,no,,,livestock_none,False,False,False,...,no,no,no,no,no,yes,no,no,no,no
4,shelter_own,,20,no,,,livestock_none,False,False,False,...,yes,no,no,no,no,yes,yes,no,no,yes


### Category 8 : Adaptive capacity


In [18]:
start_index = df.columns.get_loc("i_adaptive/i01")
end_index = df.columns.get_loc("i_adaptive/i12_other")
df_adaptive_capacity = df[df.columns[start_index: end_index]] 

In [19]:
df_adaptive_capacity.head()

Unnamed: 0,i_adaptive/i01,i_adaptive/i01income_count,i_adaptive/i04,i_adaptive/i05,i_adaptive/i05a,i_adaptive/i05a1,i_adaptive/i05a1/school_work,i_adaptive/i05a1/school_document,i_adaptive/i05a1/school_far,i_adaptive/i05a1/school_ill,...,i_adaptive/i11/vul_ochildren,i_adaptive/i11/other,i_adaptive/i11_other,i_adaptive/i12,i_adaptive/i12/a_food,i_adaptive/i12/a_nfi,i_adaptive/i12/a_health,i_adaptive/i12/a_housing,i_adaptive/i12/a_legal,i_adaptive/i12/other
0,2,2,8000,yes,,,,,,,...,False,True,No option,,,,,,,
1,1,1,3000,yes,,,,,,,...,False,True,No object,,,,,,,
2,3,3,7000,yes,,,,,,,...,False,True,No option,,,,,,,
3,1,1,4000,yes,,,,,,,...,False,True,No option,,,,,,,
4,1,1,3000,no,3.0,,,,,,...,False,True,No,,,,,,,


### Category 9 : Stereotypes


In [20]:
start_index = df.columns.get_loc("j_stereotypes/j01")
end_index = df.columns.get_loc("j_stereotypes/j0/stigma_thieves")
df_stereotypes = df[df.columns[start_index: end_index]] 

In [21]:
df_stereotypes.head()

Unnamed: 0,j_stereotypes/j01,j_stereotypes/j02,j_stereotypes/j02/source_own,j_stereotypes/j02/source_fam,j_stereotypes/j02/source_neigh,j_stereotypes/j02/source_elders,j_stereotypes/j02/source_officials,j_stereotypes/j02/source_radio,j_stereotypes/j02/source_tv,j_stereotypes/j02/source_paper,...,j_stereotypes/j03/source_radio,j_stereotypes/j03/source_tv,j_stereotypes/j03/source_paper,j_stereotypes/j03/source_internet,j_stereotypes/j03/other,j_stereotypes/j03_other,j_stereotypes/j0,j_stereotypes/j0/stigma_pasht,j_stereotypes/j0/stigma_taliban,j_stereotypes/j0/stigma_same
0,idp_host,other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,No information,stigma_pasht,1.0,0.0,0.0
1,idp_host,other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,No information,stigma_pasht,1.0,0.0,0.0
2,idp_host,other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,No information,stigma_same,0.0,0.0,1.0
3,idp_host,other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,No information,stigma_pasht,1.0,0.0,0.0
4,idp_host,other,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,No information,stigma_pasht,1.0,0.0,0.0


### Category 10: Interest to participate in the programme

In [22]:
start_index = df.columns.get_loc("k_interest/k01")
end_index = df.columns.get_loc("k_interest/k_participate/k05/ben_men")
df_interest = df[df.columns[start_index: end_index]] 

In [23]:
df_interest.head()

Unnamed: 0,k_interest/k01,k_interest/k_participate/k_note,k_interest/k_participate/k_label,k_interest/k_participate/k_label/ben_women,k_interest/k_participate/k_label/ben_men,k_interest/k_participate/k02,k_interest/k_participate/k02/ben_women,k_interest/k_participate/k02/ben_men,k_interest/k_participate/k03,k_interest/k_participate/k03/ben_women,k_interest/k_participate/k03/ben_men,k_interest/k_participate/k04,k_interest/k_participate/k04/ben_women,k_interest/k_participate/k04/ben_men,k_interest/k_participate/k05,k_interest/k_participate/k05/ben_women
0,yes,,,,,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_men,0.0
1,yes,,,,,ben_women,1.0,0.0,,,,ben_women,1.0,0.0,ben_women,1.0
2,yes,,,,,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0
3,yes,,,,,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0
4,yes,,,,,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0,1.0,ben_women ben_men,1.0


# Part 2: Deep dive in each category 

## I. Basic and Household Composition information

In [24]:
df_basics_hh.head()

Unnamed: 0,a_basics/a01,a_basics/a02,a_basics/a03,a_basics/a03_other,b_hh/b01a,b_hh/b01b,b02a,b02b,b03a,/b03b,...,b06a,b06b,hh_male,hh_female,hh_total,hh_eligbilefem,hh_girls,hh_boys,hh_lit_female,hh_lit_male
0,male,married living with spouse,tajik,,0,0,0,0,1,2,...,2.0,3,4,3,7,3,1,2,3,4
1,female,married living with spouse,hazara,,0,0,0,0,0,0,...,2.0,0,2,3,5,3,0,0,3,2
2,female,married living with spouse,pashtun,,0,0,0,0,0,4,...,3.0,5,10,7,17,7,0,4,7,10
3,female,widow,tajik,,0,0,0,0,0,0,...,2.0,1,4,5,9,4,0,0,5,4
4,female,married living with spouse,tajik,,0,0,0,1,2,0,...,4.0,2,4,4,8,4,2,1,4,3


In [25]:
x = np.linspace(0, np.pi, 300)

layout = go.Layout(
    title='<b>Distribution of Age</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(Age range)</i>'
    ),
    
    barmode='stack'
)


f1 = df_basics_hh['b_hh/b01a'].sum()
f2 = df_basics_hh['b02a'].sum()
f3 = df_basics_hh['b03a'].sum()
f4 = df_basics_hh['b04a'].sum()
f5 = df_basics_hh['b05a'].sum()

m1 = df_basics_hh['b_hh/b01b'].sum()
m2 = df_basics_hh['b02b'].sum()
m3 = df_basics_hh['/b03b'].sum()
m4 = df_basics_hh['b04b'].sum()
m5 = df_basics_hh['b05b'].sum()


trace1 = go.Bar(
    x=['0-4', '5-11', '12-17', '18-59', '60+'],
    y=[f1, f2, f3, f4, f5],
    name='females'
)
trace2 = go.Bar(
    x=['0-4', '5-11', '12-17', '18-59', '60+'],
    y=[m1, m2, m3, m4, m5],
    name='males'
)

data = [trace1, trace2]
layout = layout

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='stacked-bar')

In [26]:
x = np.linspace(0, np.pi, 300)

layout = go.Layout(
    title='<b>Distribution of Household Size</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(Household Size)</i>'
    )
)

data = [go.Histogram(x = df_basics_hh['hh_total'])]
fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='hist')

In [27]:
p1 = (df_basics_hh['hh_lit_female'].sum()*100)/df_basics_hh['hh_female'].sum() # female literate
p2 = (df_basics_hh['hh_lit_male'].sum()*100)/df_basics_hh['hh_male'].sum() # male literate

p11 = 100 - p1
p22 = 100 - p2 


fig = {
  "data": [
    {
      "values": [p1, p11],
      "labels": [
        "literate",
        "liliterate"
      ],
      "domain": {"column": 0},
      "hoverinfo":"label+percent",
      "hole": .4,
      "type": "pie"
    },
    {
      "values": [p2, p22],
      "labels": [
        "literate",
        "liliterate"
      ],
      "textposition":"inside",
      "domain": {"column": 1},
      "hoverinfo":"label+percent",
      "hole": .4,
      "type": "pie"
    }],
  "layout": {
        "title":"Percentage of Illiterates vs Literates",
        "grid": {"rows": 1, "columns": 2},
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "males",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "females",
                "x": 0.8,
                "y": 0.5
            }
        ]
    }
}
py.offline.iplot(fig, filename='donut')

# II. Displacement

In [28]:
# aggregating the reasons for displacement
df_displacement_agg = df_displacement.groupby('d_idps/d01').count()[['d_idps/d01/dis_armed']]
df_displacement_agg.columns = ['count']

In [29]:
for index, item in df_displacement_agg.iterrows():
    index_list = index.split()
    if(len(index_list)>1):
        for counter, element in enumerate(index_list):
            df_displacement_agg.loc[index_list[counter]] += df_displacement_agg.loc[index]
            
        df_displacement_agg.drop(index, inplace=True)
        
df_displacement_agg['reasons'] = ['Armed conflict'
                                  , 'Lack of economic opportunities'
                                  , 'Blood feud'
                                  , 'Indimidation and harrasment'
                                  , 'Land dispute/Land occupation'
                                  , 'Military operation'
                                  , 'Presence of mines, IEDs or other UXOs/ERW'
                                  , 'Natural disaster'
                                  , 'Cross border shelling'
                                  , 'Inter-tribal or factional fighting'
                                  , 'other'
                                  ]
df_displacement_agg.sort_values(by='count', ascending = True, inplace = True)

In [30]:
df_displacement_agg.count

<bound method DataFrame.count of                   count                                    reasons
d_idps/d01                                                        
dis_feud             15                                 Blood feud
dis_shelling         15                      Cross border shelling
dis_land             26               Land dispute/Land occupation
dis_mines            33  Presence of mines, IEDs or other UXOs/ERW
other                39                                      other
dis_tribal           70         Inter-tribal or factional fighting
dis_intimidation    104                Indimidation and harrasment
dis_naturaldis      118                           Natural disaster
dis_military        191                         Military operation
dis_armed           848                             Armed conflict
dis_economic       2385             Lack of economic opportunities>

In [31]:
layout = go.Layout(
    title = "Reasons for displacement",
    xaxis=dict(
        domain=[0.1, 1]
    ),
    yaxis = go.layout.YAxis(
        tickfont = dict(
            size = 14
        ),
        automargin = True
    ),

    margin=dict(
        l=200,
        r=10,
        t=50,
        b=50
    )
)

data = [go.Bar(
            x = df_displacement_agg['count'],
            y = df_displacement_agg['reasons'],
            orientation = 'h'
)]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='horizontal-bar')

# III. Food Access

In [32]:
df_food.head()

Unnamed: 0,e_food/e_fcs/e_fcs_intro,e_food/e_fcs/e01,e_food/e_fcs/e02,e_food/e_fcs/e03,e_food/e_fcs/e04,e_food/e_fcs/e05,e_food/e_fcs/e06,e_food/e_fcs/e07,e_food/e_fcs/e08,e_food/e_fcs/e09,...,e_food/e_hfias/e32a,e_food/e_hfias/e33,e_food/e_hfias/e33a,e_food/e34,e_food/e34_other,e_food/e35,e_food/e35/food_taste,e_food/e35/food_price,e_food/e35/food_health,e_food/e35/other
0,,7,7,5,4,3,3,7,7,7,...,,0,,dec_femalehh,,food_taste food_price,True,True,False,False
1,,7,3,4,7,3,4,6,7,7,...,,0,,dec_femalehh,,food_taste food_price,True,True,False,False
2,,7,3,5,0,2,0,6,7,7,...,,0,,dec_femalehh,,food_price,False,True,False,False
3,,7,2,2,0,1,3,4,7,7,...,,0,,dec_femalehh,,food_price,False,True,False,False
4,,7,2,3,0,0,0,0,7,7,...,3.0,1,3.0,dec_femalehh,,food_price,False,True,False,False


In [33]:
food_groups = df_food[['e_food/e_fcs/e01','e_food/e_fcs/e02', 'e_food/e_fcs/e03',
                     'e_food/e_fcs/e04', 'e_food/e_fcs/e05', 'e_food/e_fcs/e06',
                     'e_food/e_fcs/e07', 'e_food/e_fcs/e08']]
food_groups.columns = ['staples', 'Pulses', 'Vegetables', 'Fruits', 'Meat/Fish', 'Milk', 'Sugar', 'Oil']

In [34]:
food_groups['food_score'] = (food_groups['staples']*2 + food_groups['Pulses']*3 +
                             food_groups['Vegetables']*1 + food_groups['Fruits']*1 +
                             food_groups['Meat/Fish']*4 + food_groups['Milk']*4 +
                             food_groups['Sugar']*0.5 + food_groups['Oil']*0.5 )

In [35]:
#food_groups

In [36]:
layout = go.Layout(
    title='<b>Distribution of FCS</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(Food Consumption Score)</i>'
    )
)

data = [go.Histogram(x = food_groups['food_score'])]
fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='hist')

In [37]:
s_index = df_food.columns.get_loc('e_food/e_hdds/e10')
e_index = df_food.columns.get_loc("e_food/e_hdds/e24")
df_hdds = df_food[df_food.columns[s_index: e_index]] 

In [38]:
# replacing yes and no values with 1 and 0
df_hdds.replace('yes', 1, inplace = True)
df_hdds.replace('no', 0,  inplace = True)
hdd_score = df_hdds.sum(axis=1)

In [39]:
layout = go.Layout(
    title='<b>Distribution of HDDS</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(Household Dietary Diversity Score)</i>'
    )
)

data = [go.Histogram(x = hdd_score)]
fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='hist')

In [106]:
s_index = df_food.columns.get_loc('e_food/e_hfias/e25')
e_index = df_food.columns.get_loc('e_food/e34')
food_insecurities = df_food[df_food.columns[s_index: e_index]] 

In [108]:
food_insecurities.columns = ['1', '1a', '2', '2a', '3', '3a', '4', '4a', '5', 
                             '5a', '6', '6a', '7', '7a', '8', '8a', '9', '9a']

In [109]:
# some preprocessing
food_insecurities.replace('yes', 1, inplace = True)
food_insecurities.replace('no', 0, inplace = True)
food_insecurities.replace(np.nan, 0, inplace = True)
food_insecurities.replace('rarely', 1, inplace = True)
food_insecurities.replace('sometimes', 2, inplace = True)
food_insecurities.replace('often', 3, inplace = True)

In [116]:
food_insecurities['score'] = (food_insecurities['1a'] + food_insecurities['2a'] + food_insecurities['3a'] 
                              + food_insecurities['4a'] + food_insecurities['5a'] + food_insecurities['6a'] 
                              +food_insecurities['7a'] +  food_insecurities['8a'] + food_insecurities['9a'])

In [118]:
layout = go.Layout(
    title='<b>Household Food Insecurity Access Scale Score</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(HFIAS Score)</i>'
    )
)

data = [go.Histogram(x = food_insecurities['score'])]
fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='hist')

In [67]:
# adding a column for food insecurity categories
def assign_category(row):
    value = ''
    if ((row['1a'] == 0 or row['1a'] == 1) & row['2'] == 0 & 
         row['3'] == 0 & row['4'] == 0 & row['5'] == 0 & 
         row['6'] == 0 & row['7'] == 0 & row['8'] == 0 & 
         row['9'] == 0):
         value = 'food secure'
            
    elif ((row['1a'] == 2 or row['1a'] == 3 or row['2a'] == 1 or row['2a'] == 2 
         or row['2a'] == 3 or row['3a'] == 1 or row['4a'] == 1) & 
        row['5'] == 0 &  row['6'] == 0 & row['7'] == 0 & row['8'] == 0 & 
        row['9'] == 0): 
        value = 'mildly food insecure'
        
    elif ((row['3a'] == 2 or row['3a'] == 3 or row['4a'] == 2 or row['4a'] == 3 
         or row['5a'] == 1 or row['5a'] == 2 or row['6a'] == 1 or row['6a'] == 2) & 
        row['7'] == 0 & row['8'] == 0 & row['9'] == 0):   
        value = 'moderately food insecure'
    
    elif (row['5a'] == 3 or row['6a'] == 3 or row['7a'] == 1 or row['7a'] == 2 
         or row['7a'] == 3 or row['8a'] == 1 or row['8a'] == 2 or row['8a'] == 3
        or row['9a'] == 1 or row['9a'] == 2 or row['9a'] == 3):
        value = 'severely food insecure'
        
    return value   

In [76]:
food_insecurities['f_insecurities'] = '0'

In [77]:
food_insecurities['f_insecurities'] = food_insecurities.apply(assign_category, axis=1)

In [78]:
food_insecurities[food_insecurities['f_insecurities'] == '']

Unnamed: 0,1,1a,2,2a,3,3a,4,4a,5,5a,6,6a,7,7a,8,8a,9,9a,f_insecurities
4053,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
8514,1,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0,
10747,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,


In [105]:
layout = go.Layout(
    title='<b>Household Food Access Insecurity</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(Categories of food access insecurity)</i>'
    )
)
counts = food_insecurities['f_insecurities'].value_counts()

data = [go.Bar(
            x=['food secure', 'mildly food insecure', 'moderately food insecure', 'severely food insecure'],
            y=[counts.loc['food secure'], counts.loc['mildly food insecure'], 
               counts.loc['moderately food insecure'], counts.loc['severely food insecure']]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

# IV. Access to Basic Services 

In [122]:
# the highest level of education
df_basic_services['f_access/f02b'].value_counts()

sch_high    3059
sch_sec     2007
sch_prim    2003
sch_uni     1442
sch_no       915
other        681
sch_dipl     538
sch_madr     394
sch_post     221
Name: f_access/f02b, dtype: int64

In [123]:
layout = go.Layout(
    title='<b>Household Highest level of education</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(levels of education)</i>'
    )
)
levels = df_basic_services['f_access/f02b'].value_counts()

data = [go.Bar(
            x=['High School', 'Secondary School', 'Primary School', 'University', 'No Schooling', 'other',
              'College Diploma', 'Madrassa', 'Post Grad'],
            y=[levels.loc['sch_high'], levels.loc['sch_sec'], levels.loc['sch_prim'],levels.loc['sch_uni'],  
               levels.loc['sch_no'], levels.loc['other'] , levels.loc['sch_dipl'] , levels.loc['sch_madr'], 
               levels.loc['sch_post']]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [126]:
# who has the highest level of education
df_basic_services['f_access/f02a'].value_counts()

edu_boy     2735
edu_hhhm    2691
edu_girl    2127
edu_adm     1279
edu_hhhf    1082
other        926
edu_adf      420
Name: f_access/f02a, dtype: int64

In [131]:
layout = go.Layout(
    title='<b>Who has the highest level of education?</b>',
    yaxis=dict(
        title='<i>(Count)</i>'
    ),
    xaxis=dict(
        title='<i>(household members)</i>'
    )
)
levels = df_basic_services['f_access/f02a'].value_counts()

data = [go.Bar(
            x=['Male Child', 'Male head of household', 'Female Child', 'Adult male household member', 
              'Female head of household', 'other', 'Adult female household member'],
            y=[levels.loc['edu_boy'], levels.loc['edu_hhhm'], levels.loc['edu_girl'],levels.loc['edu_adm'],  
               levels.loc['edu_hhhf'], levels.loc['other'] , levels.loc['edu_adf']]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [146]:
layout = go.Layout(
    title='<b>Households with electricity</b>',
    yaxis=dict(
        title='<i>(Percentage %)</i>'
    ),
    xaxis=dict(
        title='<i>(have electricity)</i>'
    )
)

counts = df_basic_services['f_access/f03'].value_counts()
total = counts.sum()
data = [go.Bar(
            x=['YES', 'NO'],
            y=[(counts.loc['yes']*100)/total, (counts.loc['no']*100)/total]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [154]:
# reasons for not having electricity
layout = go.Layout(
    title='<b>Reasons for not having electricity</b>',
    yaxis=dict(
        title='<i>(Percentage %)</i>'
    ),
    xaxis=dict(
        title='<i>(Reasons)</i>'
    )
)
_counts = df_basic_services['f_access/f03a'].value_counts()
total = _counts.sum()

data = [go.Bar(
            x=['We cannot pay the bill', 'There is no power line close to our house',
               'The government took it away', 'We do not need electricity'],
            y=[(_counts['ele_money']*100)/total, (_counts['ele_no']*100)/total, 
               (_counts['ele_gov']*100)/total, (_counts['ele_noneed']*100)/total]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [159]:
# How clean is your water
layout = go.Layout(
    title='<b>How clean is your water?</b>',
    yaxis=dict(
        title='<i>(Percentage %)</i>'
    ),
    xaxis=dict(
        title='<i>(Source)</i>'
    )
)
counts = df_basic_services['f_access/f05'].value_counts()
total = counts.sum()

data = [go.Bar(
            x=['Clean', 'Not clean','Dont know'],
            y=[(counts['w_good']*100)/total, (counts['w_bad']*100)/total, (counts['w_idk']*100)/total]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [161]:
# How clean is your water
layout = go.Layout(
    title='<b>How far is water source from your house?</b>',
    yaxis=dict(
        title='<i>(Percentage %)</i>'
    ),
    xaxis=dict(
        title='<i>(Distance)</i>'
    )
)
counts = df_basic_services['f_access/f04b'].value_counts()
total = counts.sum()

data = [go.Bar(
            x=['<15 min', '15-30 min','>30 min'],
            y=[(counts['min_low']*100)/total, (counts['min_mid']*100)/total, (counts['min_more']*100)/total]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')

In [163]:
# health facillty access
# How clean is your water
layout = go.Layout(
    title='<b>Health facility accessibility</b>',
    yaxis=dict(
        title='<i>(Percentage %)</i>'
    ),
    xaxis=dict(
        title='<i>(Distance)</i>'
    )
)
counts = df_basic_services['f_access/f09'].value_counts()
total = counts.sum()

data = [go.Bar(
            x=['15-30 min','>30 min', '<15 min', 'no health facility accessible'],
            y=[(counts['minh_mid']*100)/total, (counts['minh_more']*100)/total, 
               (counts['minh_low']*100)/total, (counts['minh_no']*100)/total]
    )]

fig = go.Figure(data=data, layout=layout)
py.offline.iplot(fig, filename='chart-bar')