In [2]:
import pandas as pd
import numpy as np
# import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.io as pio
from ipywidgets import widgets
pio.renderers.default = 'browser'
pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'browser'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery']

In [3]:
data = pd.read_excel('all_data_retention.xlsx')

# replace NaN with blank strings
data = data.fillna('')

In [158]:
# renaming columns    
dc = np.array(data.columns)
dc[0] = 'branch'
dc[8] = 'loan_amount'
dc[9] = 'state'
dc[19] = 'housing'
dc[20] = 'housing_cost'
dc[26] = 'checking'
dc[27] = 'checking_bal'
dc[28] = 'savings'
dc[29] = 'savings_bal'
dc[35] = 'business_cat'
dc[36] = 'loan_usage'
dc[38] = 'before_grameen'
dc[39] = 'hours_grameen'
dc[40] = 'month_income_g'
dc[41] = 'location'
dc[44] = 'hispanic'
dc[45] = 'race'
dc[46] = 'other_job'
dc[52] = 'retention'
dc[53] = 'retention2'
dc[54] = 'retention3'

data.columns = dc

In [199]:
data.loc[data['housing'] == '', 'housing'] = 'Blank'
data.loc[data['housing'] != 'Blank', 'housing'] = data.loc[data['housing'] != 'Blank', 'housing'].str.split('/', n=1, expand=True)[0]

data.loc[data['checking'] == '', 'checking'] = 'Blank'
data.loc[data['checking'] != 'Blank', 'checking'] = data.loc[data['checking'] != 'Blank', 'checking'].str.split('/', n=1, expand=True)[0]

data.loc[data['hispanic'] == '', 'hispanic'] = 'Blank'
data.loc[data['hispanic'] != 'Blank', 'hispanic'] = data.loc[data['hispanic'] != 'Blank', 'hispanic'].str.split('/', n=1, expand=True)[0]

data.loc[data['race'] == '', 'race'] = 'Blank'
data.loc[data['race'] != 'Blank', 'race'] = data.loc[data['race'] != 'Blank', 'race'].str.split('/', n=1, expand=True)[0]

data.loc[data['location'] == '', 'location'] = 'Blank'
data.loc[data['location'] != 'Blank', 'location'] = data.loc[data['location'] != 'Blank', 'location'].str.split('/', n=1, expand=True)[0]

data.loc[data['retention3'] == 0, 'retention3'] = 'X Retention'
data.loc[data['retention3'] == 1, 'retention3'] = 'Retention'
data.loc[data['retention3'] == 2, 'retention3'] = 'Super Retention'

In [238]:
# histogram
# Change the second element in the list to relate branch to another categorical question

cols_plot = ['retention3', 'branch']

if len(data[data[cols_plot[1]] == '']) > 0 :
    data_plot = data[data[cols_plot[1]] != '']
else:
    data_plot = data.copy()


fig = go.Figure()
fig.add_trace(go.Histogram(
    x=data_plot[data_plot[cols_plot[0]] == 'X Retention'][cols_plot[1]],
    name = 'X Retention',
    opacity=0.75,
    marker_color = 'lightcoral',
    histnorm='percent'
    ))

fig.add_trace(go.Histogram(
    x=data_plot[data_plot[cols_plot[0]] == 'Retention'][cols_plot[1]],
    name = 'Retention',
    opacity=0.75,
    marker_color = 'cornflowerblue',
    histnorm='percent'
    ))

fig.add_trace(go.Histogram(
    x=data_plot[data_plot[cols_plot[0]] == 'Super Retention'][cols_plot[1]],
    name = 'Super Retention',
    opacity=0.75,
    marker_color = 'slateblue',
    histnorm='percent'
    ))

fig.update_layout(
    title_text= cols_plot[1] + ' by ' + cols_plot[0], # title of plot
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    yaxis_title_text='Normalized' # yaxis label
)

fig.show()
#plot(fig, filename = 'plots/hist_' + '-'.join(cols_plot) + '.html', auto_open=False)
plot(fig, filename = 'plots/histnorm_' + '-'.join(cols_plot) + '.html', auto_open=False)

'plots/histnorm_retention3-branch.html'

In [239]:
# Save the plot with title corresponding to the categories plotted
#plot(fig, filename = 'plots/hist_' + '-'.join(cols_plot) + '.html', auto_open=False)
plot(fig, filename = 'plots/histnorm_' + '-'.join(cols_plot) + '.html', auto_open=False)

'plots/histnorm_retention3-branch.html'

In [33]:
# bar
# Change the second element in the list to relate branch to another categorical question

cols_plot = ['retention', 'checking']

if len(data[data[cols_plot[1]] == '']) > 0 :
    data_plot = data[data[cols_plot[1]] != '']
else:
    data_plot = data.copy()


fig = go.Figure()
fig.add_trace(go.Bar(
    x = data_plot[cols_plot[1]].unique(),
    y = data_plot.loc[data_plot[cols_plot[0]] == 'X Retention'],
    name = 'X Retention',
    opacity=0.75,
    marker_color = 'lightcoral'
))

fig.add_trace(go.Bar(
    x = [cols_plot[1]],
    y = data_plot[data_plot[cols_plot[0]] == 'Retention'],
    name = 'Retention',
    opacity=0.75,
    marker_color = 'cornflowerblue'
))

fig.update_layout(
    barmode='group',
    title_text= cols_plot[1] + ' by ' + cols_plot[0], # title of plot
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    yaxis_title_text='Number of Members' # yaxis label
)

fig.show()
plot(fig, filename = 'plots/bar_' + '-'.join(cols_plot) + '.html', auto_open=False)

'plots/bar_retention-checking.html'

In [36]:
data_plot[data_plot[cols_plot[0]] == 'X Retention']

Unnamed: 0,branch,Account Holder ID,Account Holder ID2,Match?,Account ID,Completed Loan Cycles (Member),Days In Arrears,Activation Date (Loan),loan_amount,state,...,race,other_job,Which of the following best describes your wage-earning job (NOT your Grameen activity),How many hours per WEEK do you work at this wage-earning job,What is your total MONTHLY income from this wage earning job / Cual es tu ingreso total MENSUAL de este trabajo asalariado?,"In the last week, how many times did you use your debit/ATM card to pay for some item (such as groceries, business purchases, phone bills, electricity bills, etc.)?",Phone Provider,retention,retention_2,retention_3
204,08 - Jamaica/LIC - Queens 2,8-6812.3,8-6812.3,1,IXQL429,1,0,2019-01-31 00:00:00,1800,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,AT&T,X Retention,0,0
205,01 - Jackson Heights - Queens 1,1-2584.3,1-2584.3,1,REJM836,9,0,2019-01-09 00:00:00,5500,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,AT&T,X Retention,1,1
206,01 - Jackson Heights - Queens 1,1-9932.3,1-9932.3,1,BUAA235,6,0,2019-01-09 00:00:00,2500,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,MetroPCS,X Retention,1,1
207,01 - Jackson Heights - Queens 1,1-4312.5,1-4312.5,1,MGST377,2,0,2019-01-07 00:00:00,2500,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,Boost Mobile,X Retention,0,0
208,01 - Jackson Heights - Queens 1,1-9846,1-9846,1,EAFF150,6,0,2019-01-07 00:00:00,4000,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,T-Mobile,X Retention,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98096,01 - Jackson Heights - Queens 1,1-5677,1-5677,1,GCKK267,3,0,2019-02-27 00:00:00,3000,Closed,...,,Yes / Si,"Cleaning (House Cleaning, Janitor, Laundry/Dry...",1-10 Hours,$601-800,1-3 times in the last week,AT&T,X Retention,1,1
98097,01 - Jackson Heights - Queens 1,1-5674.1,1-5674.1,1,BQPM880,3,0,2019-02-27 00:00:00,3000,Closed,...,,No,,0 Hours (N/A),$0 (N/A),1-3 times in the last week,T-Mobile,X Retention,1,1
98339,04 - Inwood - Manhattan 1,4-5564.2,4-5564.2,1,OEZO697,7,0,2019-01-30 00:00:00,3500,Closed,...,,No,,0 Hours (N/A),$0 (N/A),4-10 times in the last week,MetroPCS,X Retention,1,1
98340,14 - Austin - Texas,14-4931,14-4931,1,KYRP079,4,0,2019-02-05 00:00:00,2500,Closed,...,,No,,0 Hours (N/A),$0 (N/A),4-10 times in the last week,Cricket Mobile,X Retention,1,1


In [None]:
# Save the plot with title corresponding to the categories plotted
plot(fig, filename = 'plots/bar_' + '-'.join(cols_plot) + '.html', auto_open=False)