In [1]:
### Give Directly (GD) -- Data Viz Example
## jondlesko (ltdqty)
# Last Updated: 7/4/2025

# Source: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/M2GAZN
# Title: Replication Data for The Short-Term Impact Of Unconditional Cash Transfers To The Poor: Experimental Evidence From Kenya
# Authors: Haushofer, Johannes and Shapiro, Jeremy

# Citation: Haushofer, Johannes; Shapiro, Jeremy, 2017, "Replication Data for: The Short-Term Impact Of Unconditional Cash Transfers To The Poor: 
# Experimental Evidence From Kenya", https://doi.org/10.7910/DVN/M2GAZN, Harvard Dataverse, V2, UNF:6:eB/A7wOcFTso1/ckVxjV3w== [fileUNF]

In [None]:
## libraries

import pandas as pd

import numpy as np

from IPython.display import display, HTML

import plotly.express as px

import difflib

from scipy.stats import norm

import kaleido

In [None]:
# No scientific notation

pd.set_option('display.float_format', '{:.4f}'.format)

In [4]:
## Read in tab-delimited file

    # UCT_FINAL_CLEAN.tab

df = pd.read_csv('UCT_FINAL_CLEAN.tab', sep='\t')

df.shape


(2880, 981)

In [5]:
## Disable (and reset default) truncation

# Set max column width to None
pd.set_option('display.max_colwidth', None)

# Reset column width to default (typically 50 characters)
# pd.reset_option('display.max_colwidth')


In [6]:
## Fuzzy match for column names, top 5 ('difflab' library)

column_name = 'psy_index'
matches = difflib.get_close_matches(column_name, df.columns, n=5, cutoff=0.4)

if matches:
    for i, match in enumerate(matches, 1):
        index = df.columns.get_loc(match)
        print(f"{i}. '{match}' is at position {index}")
else:
    print("No close matches found.")

1. 'psy_index_z1' is at position 114
2. 'psy_index_z0' is at position 111
3. 'psy_index_z_miss0' is at position 113
4. 'psy_index_z_full0' is at position 112
5. 'ed_index1' is at position 110


In [7]:
## slim dataset

dfs = df.iloc[:, np.r_[
    0:8,        # slice from col 0 to 7
    13:17,
    111,
    113:115
]].copy()


In [8]:
dfs.columns

Index(['surveyid', 'femaleres', 'maleres', 'village', 'treat', 'spillover',
       'purecontrol', 'control_village', 'treatXlump', 'treatXmonthly',
       'treatXlarge', 'treatXsmall', 'psy_index_z0', 'psy_index_z_miss0',
       'psy_index_z1'],
      dtype='object')

In [9]:
## Set all the data types individually

dtype_map = {
    'surveyid': 'object',
    'femaleres': 'Int64',
    'maleres': 'Int64',
    'village': 'Int64',
    'treat': 'Int64',
    'spillover': 'Int64',
    'purecontrol': 'Int64',
    'control_village': 'Int64',
    'treatXlump': 'Int64',
    'treatXmonthly': 'Int64',
    'treatXlarge': 'Int64',
    'treatXsmall': 'Int64',
    'psy_index_z0': 'float',
    'psy_index_z_miss0': 'Int64',
    'psy_index_z1': 'float'
}

for col, dtype in dtype_map.items():
    dfs[col] = dfs[col].astype(dtype)


In [10]:
## 'purecontrol' does not have baseline measurements for 'psy_index_z0'

# Define treatment group conditions (non-exclusive)
group_defs = {
    'Pure Control': dfs['purecontrol']==1,
    'Spillover Control': (dfs['treat'] == 0) & (dfs['purecontrol'] == 0),
    'Small Transfer': dfs['treatXsmall'] == 1,
    'Lump Sum': dfs['treatXlump'] == 1,
    'Monthly': dfs['treatXmonthly'] == 1,
    'Large Transfer': dfs['treatXlarge'] == 1
}

# Compute change in psychological wellbeing (delta)
dfs['delta_psy_index'] = dfs['psy_index_z1'] - dfs['psy_index_z0']

# check for value counts in each group
for group, condition in group_defs.items():
    for gender_label, gender_col in [('Female', 'femaleres'), ('Male', 'maleres')]:
        mask = condition & (dfs[gender_col] == 1)
        n_total = mask.sum()
        n_nonnull = dfs.loc[mask, 'delta_psy_index'].notnull().sum()
        print(f"{group} | {gender_label}: total={n_total}, with Δ={n_nonnull}")


Pure Control | Female: total=432, with Δ=0
Pure Control | Male: total=432, with Δ=0
Spillover Control | Female: total=505, with Δ=384
Spillover Control | Male: total=505, with Δ=272
Small Transfer | Female: total=366, with Δ=278
Small Transfer | Male: total=366, with Δ=190
Lump Sum | Female: total=245, with Δ=190
Lump Sum | Male: total=245, with Δ=134
Monthly | Female: total=258, with Δ=193
Monthly | Male: total=258, with Δ=140
Large Transfer | Female: total=137, with Δ=105
Large Transfer | Male: total=137, with Δ=84


In [11]:
## Defining 'summary_delta' as input data to graphic

# Compute change in psychological wellbeing (delta)
dfs['delta_psy_index'] = dfs['psy_index_z1'] - dfs['psy_index_z0']

avg_delta = dfs['delta_psy_index'].mean().round(4)
avg_delta_fem = dfs.loc[dfs['femaleres']==1, 'delta_psy_index'].mean().round(4)
avg_delta_male = dfs.loc[dfs['maleres']==1, 'delta_psy_index'].mean().round(4)


# Re-define treatment group conditions (non-exclusive)
group_defs = {
    'Spillover Control': (dfs['treat'] == 0) & (dfs['purecontrol'] == 0),
    'Small Transfer': dfs['treatXsmall'] == 1,
    'Lump Sum': dfs['treatXlump'] == 1,
    'Monthly': dfs['treatXmonthly'] == 1,
    'Large Transfer': dfs['treatXlarge'] == 1
}


# Summarize average delta by group and gender
records = []
for group, condition in group_defs.items():
    for gender_label, gender_col in [('Female', 'femaleres'), ('Male', 'maleres')]:
        mask = condition & (dfs[gender_col] == 1)
        delta_mean = dfs.loc[mask, 'delta_psy_index'].dropna().mean()
        records.append({'Group': group, 'Gender': gender_label, 'Delta': delta_mean})

summary_delta = pd.DataFrame(records)


# Round delta values and sort groups by average delta
summary_delta['Delta'] = summary_delta['Delta'].round(4)

group_order = (
    summary_delta[summary_delta['Gender'] == 'Female']
    .sort_values(by='Delta')['Group']
    .tolist()
)
summary_delta


Unnamed: 0,Group,Gender,Delta
0,Spillover Control,Female,-0.0359
1,Spillover Control,Male,0.0021
2,Small Transfer,Female,0.1425
3,Small Transfer,Male,0.3394
4,Lump Sum,Female,0.1665
5,Lump Sum,Male,0.427
6,Monthly,Female,0.1961
7,Monthly,Male,0.2816
8,Large Transfer,Female,0.2845
9,Large Transfer,Male,0.3827


In [12]:
## Double check 'spillover' control group by gender

print((
    dfs.loc[(dfs['spillover'] == 1) & (dfs['femaleres'] == 1), 'psy_index_z1']
    - dfs.loc[(dfs['spillover'] == 1) & (dfs['femaleres'] == 1), 'psy_index_z0']
).mean().round(4))

print((
    dfs.loc[(dfs['spillover'] == 1) & (dfs['maleres'] == 1), 'psy_index_z1']
    - dfs.loc[(dfs['spillover'] == 1) & (dfs['maleres'] == 1), 'psy_index_z0']
).mean().round(4))


-0.0359
0.0021


In [13]:
## Interpreting z-scores as percentile shifts (from scipy.stats import norm)

# Start at 50th percentile (z=0), then shift by delta
summary_delta['Percentile_Gain'] = summary_delta['Delta'].apply(
    lambda z: round((norm.cdf(z) - 0.5) * 100, 1)
)

# Clean up decimal places
summary_delta['Percentile_Gain'] = summary_delta['Percentile_Gain'].apply(lambda x: f"{x:+.1f} pp")
summary_delta['Delta_Display'] = summary_delta['Delta'].apply(lambda x: f"{x:.4f}")

summary_delta

Unnamed: 0,Group,Gender,Delta,Percentile_Gain,Delta_Display
0,Spillover Control,Female,-0.0359,-1.4 pp,-0.0359
1,Spillover Control,Male,0.0021,+0.1 pp,0.0021
2,Small Transfer,Female,0.1425,+5.7 pp,0.1425
3,Small Transfer,Male,0.3394,+13.3 pp,0.3394
4,Lump Sum,Female,0.1665,+6.6 pp,0.1665
5,Lump Sum,Male,0.427,+16.5 pp,0.427
6,Monthly,Female,0.1961,+7.8 pp,0.1961
7,Monthly,Male,0.2816,+11.1 pp,0.2816
8,Large Transfer,Female,0.2845,+11.2 pp,0.2845
9,Large Transfer,Male,0.3827,+14.9 pp,0.3827


In [14]:
# Export summary_delta csv

summary_delta.to_csv("summary_delta.csv", index=False, encoding='utf-8')


In [15]:
## HTML interactive graphic using plotly 
# libraries: import pandas as pd; import plotly.express as px

# Summary dataframe (embedded; no configuration)
summary_delta = pd.DataFrame([
    {"Group": "Spillover Control", "Gender": "Female", "Delta": -0.0359, "Delta_Display": "-0.0359", "Percentile_Gain": "-1.4 pp"},
    {"Group": "Spillover Control", "Gender": "Male", "Delta": 0.0021, "Delta_Display": "0.0021", "Percentile_Gain": "+0.1 pp"},
    {"Group": "Small Transfer", "Gender": "Female", "Delta": 0.1425, "Delta_Display": "0.1425", "Percentile_Gain": "+5.7 pp"},
    {"Group": "Small Transfer", "Gender": "Male", "Delta": 0.3394, "Delta_Display": "0.3394", "Percentile_Gain": "+13.3 pp"},
    {"Group": "Lump Sum", "Gender": "Female", "Delta": 0.1665, "Delta_Display": "0.1665", "Percentile_Gain": "+6.6 pp"},
    {"Group": "Lump Sum", "Gender": "Male", "Delta": 0.4270, "Delta_Display": "0.4270", "Percentile_Gain": "+16.5 pp"},
    {"Group": "Monthly", "Gender": "Female", "Delta": 0.1961, "Delta_Display": "0.1961", "Percentile_Gain": "+7.8 pp"},
    {"Group": "Monthly", "Gender": "Male", "Delta": 0.2816, "Delta_Display": "0.2816", "Percentile_Gain": "+11.1 pp"},
    {"Group": "Large Transfer", "Gender": "Female", "Delta": 0.2845, "Delta_Display": "0.2845", "Percentile_Gain": "+11.2 pp"},
    {"Group": "Large Transfer", "Gender": "Male", "Delta": 0.3827, "Delta_Display": "0.3827", "Percentile_Gain": "+14.9 pp"}
])


# Create Plotly bar chart
fig = px.bar(
    summary_delta,
    x='Delta',
    y='Group',
    color='Gender',
    orientation='h',
    barmode='group',
    color_discrete_map={'Female': '#D4AF37', 'Male': '#2E5E4E'},
    category_orders={'Group': group_order},
    custom_data=['Gender', 'Delta_Display', 'Percentile_Gain']
)

# Layout and styling
fig.update_layout(
    xaxis_title=dict(
        text='<b>Change in Psychological Wellbeing Index (Δ z-score)</b>',
        font=dict(family='Source Sans Pro', size=15)
    ),
    yaxis_title=dict(
        text='<b>Treatment Group</b>',
        font=dict(family='Source Sans Pro', size=15)
    ),
    xaxis=dict(showgrid=True, gridcolor='#E0E0E0', zerolinecolor='#8C8C8C'),
    yaxis=dict(autorange='reversed'),
    font=dict(color='#000000'),
    legend_title=dict(
        text='<b>Gender</b>',
        font=dict(family='Source Sans Pro', size=13, color='#000000')
    ),
    legend=dict(
        font=dict(family='Source Sans Pro', size=13, color='#000000')
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
    bargap=0.3,
    margin=dict(t=100, b=140, l=80, r=100),
    width=1200,
    height=525,
    title=dict(
        text="<span style='font-size:20px; color:#2E5E4E; font-family:Georgia; font-weight:bold;'>Cash That Heals</span>"
             "<span style='font-size:16px; color:#2E5E4E; font-family:Georgia; font-style:italic;'>&nbsp;&nbsp;&nbsp;How unconditional cash shaped mental health, by gender and treatment group</span><br>"
             "<span style='font-size:15px; color:#444444; font-family:Source Sans Pro;'>Across all transfer types, women experienced meaningful psychological gains—underscoring cash's potential to support female mental health, even when not explicitly targeted.</span>",
        x=0,
        xanchor='left'
    )
)


# Hover pop-up specification
fig.update_traces(
    hovertemplate=
        "<b>Group:</b> %{y}<br>" +
        "<b>Gender:</b> %{customdata[0]}<br>" +
        "<b>Δ z-score:</b> %{customdata[1]}<br>" +
        "<b>Approx. Percentile Gain:</b> %{customdata[2]}<extra></extra>"
)


# Add V-line delta average and interpretive footnotes and source
fig.add_vline(
    x=avg_delta,
    line_dash="dash",
    line_color="#8C8C8C",
    line_width=1.5,
    annotation_text=f"Avg Δ = {avg_delta}",
    annotation_position="top",
    annotation_font=dict(size=11, color="#8C8C8C")
)

fig.add_annotation(
    text="Note: A z-score change of 0.25 corresponds to a shift from the 50th to roughly the 60th percentile in psychological wellbeing.",
    xref="paper", yref="paper",
    x=0.5, y=-0.245,
    xanchor="center",
    showarrow=False,
    font=dict(family='Source Sans Pro', size=14, color="#000000"),
    align="center"
)

fig.add_annotation(
    text="<b>Source:</b> Haushofer <br>& Shapiro (2017),<br>"
        "<a href='https://doi.org/10.7910/DVN/M2GAZN' target='_blank'>Harvard Dataverse</a>",
    xref="paper", yref="paper",
    x=1.0125, y=0.5,
    xanchor="left",
    showarrow=False,
    align="left",
    font=dict(family='Source Sans Pro', size=12, color="#000000"),
    bordercolor="rgba(0,0,0,0.1)",
    borderwidth=0,
    bgcolor="rgba(255,255,255,0)"
)

fig.show()

In [None]:
## export to html and print

import plotly.io as pio

# Inside your callback or after creating fig
pio.write_html(fig, file="GD_DataViz_Ex.html", full_html=True, include_plotlyjs="cdn")