In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

import math
import numpy as np
import pandas as pd
import missingno as msn

from IPython.display import set_matplotlib_formats
sns.set(context='notebook',style='white',
        font='ubuntu', font_scale=.9, palette='viridis_r')
set_matplotlib_formats('retina')


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings('ignore')

## Data Load
-----

In [None]:
kiva_basics = pd.read_csv('../input/data-science-for-good-kiva-crowdfunding/kiva_loans.csv')
msn.matrix(kiva_basics, figsize=(9,6), fontsize=10)
plt.show()

In [None]:
kiva_regions = pd.read_csv('../input/data-science-for-good-kiva-crowdfunding/kiva_mpi_region_locations.csv')

kiva_regions = kiva_regions.groupby(['ISO', 
    'country', 'world_region'])['MPI'].mean().fillna(0).reset_index()
msn.matrix(kiva_regions, figsize=(9,2), fontsize=10)
plt.show()

In [None]:
kiva_combined = kiva_basics.merge(kiva_regions, how = 'left', on = 'country')
kiva_combined.head(1)

## Exploratory Data Analysis
-------------

### Gender structure of loans in a scope of countries

In [None]:
def gender_up(x):
    lst = []
    for i in x:
        if i == 'male' or i == 'female':
            lst.append(i)
        else:
            lst.append('group')
    return lst
        
kiva_combined['borrower_genders'] = gender_up(kiva_combined['borrower_genders'].to_list())
kiva_combined.borrower_genders.value_counts().reset_index()

In [None]:
countries = kiva_combined['country'].value_counts()[kiva_combined['country']\
            .value_counts(normalize = True) > 0.005]

gender_proportions = round(kiva_combined[kiva_combined['country']\
            .isin(countries.index.values)]\
            .groupby(['country', 'borrower_genders'])['borrower_genders']\
            .count().groupby(level = 0).apply(lambda x: 100 * x / x.sum())\
            .unstack('borrower_genders')\
            .fillna(0)\
            .sort_values(by = ['female', 'male']), 1)

gender_proportions.head(2)

In [None]:
gender_proportions = gender_proportions[['male','female','group']]

colors = ['#F7DC6F','#5D6D7E','#F08080']

female=gender_proportions[-20:].stack().reset_index().rename(columns={0:'share'})

fig = px.bar(female, x='share', y='country',
             color='borrower_genders') 

fig.update_traces(opacity=.7,
    marker_line_color='darkblue')

fig.update_layout(
    title='Countries with most borrowers from male and group together: loans structure by gender',
    height=430,width=1000,
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=90, r=30, t=70, b=70),
    showlegend=True
)
fig.show()

In [None]:
non_female = gender_proportions[(gender_proportions['male'] + gender_proportions['group']) 
 > gender_proportions['female']].sort_values(by = ['male','group'], ascending = True)[-20:]\
.stack().reset_index().rename(columns={0:'share'})

fig = px.bar(non_female, x='share', y='country',
             color='borrower_genders') 

fig.update_traces(opacity=.7,
    marker_line_color='darkblue')

fig.update_layout(
    title='Countries with most borrowers from male and group together: loans structure by gender',
    height=430,width=1000,
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=90, r=30, t=70, b=70),
    showlegend=True
)
fig.show()

In [None]:
mostly_group = gender_proportions[gender_proportions['group'] > 
(gender_proportions['female'] + gender_proportions['male'])]\
.sort_values(by = 'group').stack().reset_index().\
rename(columns={0:'share'})

fig = px.bar(mostly_group, x='share', y='country',
             color='borrower_genders') 

fig.update_traces(opacity=.7,
    marker_line_color='darkblue')

fig.update_layout(
    title='Countries with most borrowers from male: loans structure by gender',
    height=260,width=1020,
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=90, r=30, t=70, b=70),
    showlegend=True
)
fig.show()

In [None]:
mostly_male = gender_proportions[gender_proportions['male'] >
(gender_proportions['female'] + gender_proportions['group'])]\
.sort_values(by = 'male').stack().reset_index().\
rename(columns={0:'share'})

fig = px.bar(mostly_male, x='share', y='country',
             color='borrower_genders') 

fig.update_traces(opacity=.7,
    marker_line_color='darkblue')

fig.update_layout(
    title='Countries with most borrowers from male: loans structure by gender',
    height=230,width=1000,
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=100, r=30, t=70, b=70),
    showlegend=True
)
fig.show()

In [None]:
# countries with the highest loans (more than 50000 USD)

sample = kiva_combined[(kiva_combined['loan_amount'] >= 50000) 
                       & (kiva_combined['use'].isna() == False)]\
.sort_values('loan_amount', ascending = False)

sample['country'].value_counts()[:5].reset_index()

In [None]:
# sample of purposes for highest loans

sample['use'][-5:]

In [None]:
kiva_dev = (kiva_combined['loan_amount'] - kiva_combined['loan_amount'].mean()).abs()
kiva_std = kiva_combined['loan_amount'].std()
kiva_sigma = kiva_combined[~(kiva_dev > 3 * kiva_std)]

gender_mean_median = kiva_sigma.groupby(['borrower_genders'])['loan_amount']\
.agg(median = 'median', mean = 'mean').sort_values(by = 'median', ascending = False)

order = gender_mean_median.index.to_list()

fig = px.box(kiva_sigma, x='loan_amount',y='borrower_genders', 
             color='borrower_genders')

fig.update_traces(
    opacity=.85,
    marker_line_color='black'
)

fig.update_layout(
    height=340, width=1000,
    title='Sum of loans by gender',
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=100, r=20, t=70, b=70),
    showlegend=False,
)
fig.show()

In [None]:
kiva_median = kiva_sigma.groupby(['world_region','country','borrower_genders']).agg({
'loan_amount':'median'}).reset_index().sort_values('borrower_genders')

by_male = kiva_median[
    kiva_median['borrower_genders']=='male'].sort_values(by = ['world_region','loan_amount'])

by_female = kiva_median[
    kiva_median['borrower_genders']=='female'].sort_values(by = ['world_region','loan_amount'])

by_group = kiva_median[
    kiva_median['borrower_genders']=='group'].sort_values(by = ['world_region','loan_amount'])

fig = make_subplots(rows=1, cols=3, specs=[[{},{},{}]], 
      shared_xaxes=True, shared_yaxes=True, vertical_spacing=0.5)

fig.append_trace(go.Box(
    x = by_female.loan_amount,
    y = by_female.world_region,
    marker=dict(
        opacity=.95,
        color='black',
        line=dict(
            color='black',
            width=.25),
    ), 
    
name='Range of loans for females',
    orientation='h',
),1,1)

fig.append_trace(go.Box(
    x = by_male.loan_amount,
    y = by_male.world_region,
    marker=dict(
        opacity=.95,
        color='red',
        line=dict(
            color='black',
            width=.25),
    ),
    name='Range of loans for males',
    orientation='h',
),1,2)

fig.append_trace(go.Box(
    x = by_group.loan_amount,
    y = by_group.world_region,
    marker=dict(
        opacity=.95,
        color='blue',
        line=dict(
            color='black',
            width=.25),
    ),
    name='Range of loans for groups',
    orientation='h',
),1,3)

fig.update_traces(
    opacity=.7)

fig.update_layout(
    title='Range of Loands Divided by Gender and World Region',
    yaxis=dict(
        showgrid=False,
        showline=True,
        showticklabels=True,
        linewidth=3,
        domain=[0, .8],
    ),
   yaxis2=dict(
       showgrid=False,
       linewidth=3,
       domain=[0,.8],
    ),
    
   yaxis3=dict(
        showgrid=False,
        linewidth=3,
        domain=[0,.8]
   ),
    xaxis=dict(
        zeroline=True,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[0,.3],
        side='bottom'
    ),
    xaxis2=dict(
        zeroline=True,
        showline=False,
        showticklabels=True,
        showgrid=True,
        domain=[.35, .65],
        side='bottom',
    ),
    xaxis3=dict(
        zeroline=True,
        showline=True,
        showticklabels=True,
        showgrid=True,
        domain=[.7, 1],
        side='bottom',
    ),
    legend=dict(x=0.029, y=1.075, font_size=10),
    margin=dict(l=100, r=20, t=70, b=70),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    height=400,
)

fig.show()

In [None]:
regions = kiva_combined['world_region'].value_counts()

gender_by_macroregion = kiva_combined[kiva_combined['world_region']\
.isin(regions.index.values)].groupby(['world_region', 'borrower_genders'])\
['borrower_genders'].count().groupby(level = 0).apply(lambda x: 100 * x / x.sum())\
.unstack('borrower_genders').fillna(0).sort_values(by = ['female', 'male'])

gender_by_macroregion = round(gender_by_macroregion[['male','female','group']]\
.sort_values(by = 'male', ascending = True), 2).stack()\
.reset_index().rename(columns={0:'share'})

fig = px.bar(gender_by_macroregion, x='share', y='world_region',
             color='borrower_genders') 

fig.update_traces(opacity=.7,
    marker_line_color='darkblue')

fig.update_layout(
    title='Macroregions with most borrowers from males: loans structure by gender',
    height=340,width=1000,
    plot_bgcolor='rgb(243, 243, 243)',
    margin=dict(l=100, r=30, t=70, b=70),
    showlegend=True
)
fig.show()