# Income, School

In [2]:
# @author: Serena Chen
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
import matplotlib.pyplot as plt
import plotly
import chart_studio.plotly as py
import plotly.graph_objects as go
import scipy.stats as stats

In [36]:
# baseline
baseline = pd.read_csv('EWLI_baseline_data_cleaned.csv')
herat_base = baseline.loc[baseline['City']=='herat',:]
jal_base = baseline.loc[baseline['City']=='jalalabad',:]

In [45]:
herat_base# midline
herat_mid = pd.read_excel('Herat Mid-term Data Set.xlsx').fillna(0)
jal_mid = pd.read_excel('JLBD Mid-term Data Set.xlsx').fillna(0)

In [57]:
# endline
herat_end = pd.read_excel('EWLI_HRT_Endline_Treat_Group - latest version - labels - 2019-04-07-11-30-56.xlsx').fillna(0)
jal_end = pd.read_excel('EWLI_Jala Abad_Endline_Treat_Group - latest version - labels - 2019-04-07-11-27-54.xlsx').fillna(0)

In [78]:
# ctrl group - the data was collected after program ending from people who didn't participate in the program
herat_ctrl = pd.read_excel('EWLI_HRT_Endline_CTRL_Group - latest version - labels - 2019-03-27-11-29-40.xlsx').fillna(0)
jal_ctrl = pd.read_excel('EWLI_Jalal Abad_Endline_CTRL_Group - latest version - labels - 2019-04-07-11-25-09.xlsx').fillna(0)

# Function

In [62]:
# define the function that calculates average income of each person - base
def cal_ave_income(df):
    for index, row in df.iterrows():
        if row['i_adaptive/i04'] != 0 and row['i_adaptive/i01'] != 0:
            df.at[index, 'income_ave'] = row['i_adaptive/i04'] / row['i_adaptive/i01']
        elif row['i_adaptive/i04'] == 0:
            df.at[index, 'income_ave'] = 0
        elif row['i_adaptive/i04'] != 0 and row['i_adaptive/i01'] == 0:
            df.at[index, 'income_ave'] = row['i_adaptive/i04']
    return df

In [67]:
# define the function that calculates average income of each person - mid, end
def cal_ave_income_me(df):
    for index, row in df.iterrows():
        if row['I03. What is your average monthly income (entire HH, in AFA)?'] != 0 and row['I01. How many people are working in the family to earn money?'] != 0:
            df.at[index, 'income_ave'] = row['I03. What is your average monthly income (entire HH, in AFA)?'] / row['I01. How many people are working in the family to earn money?']
        elif row['I03. What is your average monthly income (entire HH, in AFA)?'] == 0:
            df.at[index, 'income_ave'] = 0
        elif row['I03. What is your average monthly income (entire HH, in AFA)?'] != 0 and row['I01. How many people are working in the family to earn money?'] == 0:
            df.at[index, 'income_ave'] = row['I03. What is your average monthly income (entire HH, in AFA)?']
    return df

In [126]:
# define the function that calculates percentage of each value for the column
def cal_pct(df):
    return round(100 * df.value_counts(normalize=True), 2)

# Analysis

## 1. Income

### 1.1 Average Income Per Person

In [63]:
# average income of each person from each family in herat - baseline
herat_base = cal_ave_income(herat_base)

In [86]:
herat_bi = round(herat_base['income_ave'].mean(), 2)
print (herat_bi)

6234.19


In [65]:
# average income of each person from each family in jal - baseline
jal_base = cal_ave_income(jal_base)

In [87]:
jal_bi = round(jal_base['income_ave'].mean(), 2)
print (jal_bi)

9471.38


In [68]:
# average income of each person from each family in herat - mid
herat_mid = cal_ave_income_me(herat_mid)

In [88]:
herat_mi = round(herat_mid['income_ave'].mean(), 2)
print (herat_mi)

5661.11


In [70]:
# average income of each person from each family in jal - mid
jal_mid = cal_ave_income_me(jal_mid)

In [89]:
jal_mi = round(jal_mid['income_ave'].mean(), 2)
print (jal_mi)

7289.6


In [72]:
# average income of each person from each family in herat - end
herat_end = cal_ave_income_me(herat_end)

In [90]:
herat_ei = round(herat_end['income_ave'].mean(), 2)
print (herat_ei)

5988.45


In [76]:
# average income of each person from each family in jal - end
jal_end = cal_ave_income_me(jal_end)

In [91]:
jal_ei = round(jal_end['income_ave'].mean(), 2)
print (jal_ei)

7890.81


In [79]:
# average income of each person from each family in herat - ctrl
herat_ctrl = cal_ave_income_me(herat_ctrl)

In [92]:
herat_ctrl_ei = round(herat_ctrl['income_ave'].mean(), 2)
print (herat_ctrl_ei)

5679.62


In [84]:
# average income of each person from each family in jal - ctrl
jal_ctrl = cal_ave_income_me(jal_ctrl)

In [93]:
jal_ctrl_ei = round(jal_ctrl['income_ave'].mean(), 2)
print (jal_ctrl_ei)

6044.57


In [135]:
Midline = go.Bar(
               x=['Herat', 'Jalalabad'],
               y=[herat_mi, jal_mi],
               name='Midline_Income'
               )

Endline = go.Bar(
            x=['Herat', 'Jalalabad'],
            y=[herat_ei, jal_ei],
            name='Endline_Income'
            )
data = [Midline, Endline]

layout = go.Layout(
                    title='Comparison of Average Income Per Person in Each Area',
                    xaxis=dict(title='<i>Data collection</i>'),
                    yaxis=dict(title='<i>Afghani</i>'),
                    barmode='group'
                )

fig = go.Figure(data=data, layout=layout)
fig.show()

In [102]:
Participant = go.Bar(
               x=['Herat', 'Jalalabad'],
               y=[herat_ei, jal_ei],
               name='Participant'
               )

Control_Group = go.Bar(
            x=['Herat', 'Jalalabad'],
            y=[herat_ctrl_ei, jal_ctrl_ei],
            name='Control Group'
            )
data = [Participant, Control_Group]

layout = go.Layout(
                    title='Comparison of Average Income Per Person',
                    xaxis=dict(title='<i>Data collection</i>'),
                    yaxis=dict(title='<i>Afghani</i>'),
                    barmode='group'
                )

fig = go.Figure(data=data, layout=layout)
fig.show()

From the above analysis, we can see that the average income per person has increased after the program ended (the baseline income is not from the same group as midline and endline). Comparing between midline and endline stages, in Herat, the average income per person has increased by 5.78%. In Jalalabad, the average income per person has increased by 8.25%.  
Compared with the control group, we can see that after the program, the average income(endline) per person in Herat is 5.44% more than that of the control group. In Jalalabad, the average income(endline) per person is 30.54% more than the control group.

## School

### 2.1 Percentage of Children Attending School

In [108]:
# Percentage of Children Attending School in Herat - base
herat_base_school = cal_pct(herat_base['i_adaptive/i05'].map(dict(yes=1, no=0)))
print (herat_base_school)

1    83.89
0    16.11
Name: i_adaptive/i05, dtype: float64


In [109]:
# Percentage of Children Attending School in Jalalabad - base
jal_base_school = cal_pct(jal_base['i_adaptive/i05'].map(dict(yes=1, no=0)))
print (jal_base_school)

1    75.69
0    24.31
Name: i_adaptive/i05, dtype: float64


In [110]:
# Percentage of Children Attending School in Herat - mid
herat_mid_school = cal_pct(herat_mid['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(yes=1, no=0)))
print (herat_mid_school)

1.0    74.43
0.0    25.57
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [118]:
# Percentage of Children Attending School in Jalalabad - mid
jal_mid_school = cal_pct(jal_mid['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(yes=1, no=0)))
print (jal_mid_school)

1    75.37
0    24.63
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [130]:
# Percentage of Children Attending School in Herat - end
herat_end_school = cal_pct(herat_end['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(Yes=1, No=0)))
print (herat_end_school)

1    85.26
0    14.74
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [131]:
# Percentage of Children Attending School in Jalalabad - end
jal_end_school = cal_pct(jal_end['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(Yes=1, No=0)))
print (jal_end_school)

1    97.87
0     2.13
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [132]:
# Percentage of Children Attending School in Herat - Ctrl
herat_ctrl_school = cal_pct(herat_ctrl['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(Yes=1, No=0)))
print (herat_ctrl_school)

1    79.59
0    20.41
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [134]:
# Percentage of Children Attending School in Jalalabad - Ctrl
jal_ctrl_school = cal_pct(jal_ctrl['I05.Are all school-aged children (6-18)  attending school regularly?'].map(dict(Yes=1, No=0)))
print (jal_ctrl_school)

1    51.11
0    48.89
Name: I05.Are all school-aged children (6-18)  attending school regularly?, dtype: float64


In [166]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=['Baseline', 'Midline', 'Endline'], y=[herat_base_school[1], herat_mid_school[1], herat_end_school[1]],
                    mode='lines+markers',
                    name='Herat'))

fig.add_trace(go.Scatter(x=['Baseline', 'Midline', 'Endline'], y=[jal_base_school[1], jal_mid_school[1], jal_end_school[1]],
                    mode='lines+markers',
                    name='Jalalabad'))

fig.update_layout(title='Changes of the Percentage of School-Aged Children that Attends School in Both Area')
fig.update_layout(yaxis={"title": "% of Children Attending School", "range": [70, 100], "tick0": 70, "dtick": 2},
                 xaxis={"title": "Stages"})
fig.show()

In [146]:
Participant = go.Bar(
               x=['Herat', 'Jalalabad'],
               y=[herat_end_school[1], jal_end_school[1]],
               name='Participant'
               )

Control_Group = go.Bar(
            x=['Herat', 'Jalalabad'],
            y=[herat_ctrl_school[1], jal_ctrl_school[1]],
            name='Control Group'
            )
data = [Participant, Control_Group]

layout = go.Layout(
                    title='Comparison of Percentages of Children Attending School',
                    xaxis=dict(title='<i>Data collection</i>'),
                    yaxis=dict(title='<i>Percentage %</i>'),
                    barmode='group'
                )

fig = go.Figure(data=data, layout=layout)
fig.show()

From the analysis above, we can see that the percentages of children attending school increased thanks to the program.

Originally, the percentages of children attending school in Jalalabad is lower than that of Herat. After the program, it is about 12% higher than that of Herat. 

Both the percentages of the two areas increased comparing their based. 

Compared with the control group, the percentage of children attending school in Herat after the program is 5.67% higher than that of the control group. In Jalalabad, the percentage of children attending school in Herat after the program is 46.76% higher than that of the control group.

We can see that the people in Jalalabad seem take more advantages of the program than the people in Herat considering this indicator.