# Women and loan requests

*Using `loans.csv` from http://s3.kiva.org/snapshots/kiva_ds_csv.zip (found at http://build.kiva.org)*

Goal: Visualize how much women request for loans compared to men/others

In [1]:
import pandas as pd
from collections import Counter

In [2]:
LOANS = pd.read_csv('../loans.csv', parse_dates = ['posted_time'])

## Data Preprocessing
Only want after 2010 

** Features ** 

- `loan_amount` 
- `status`: loans that are funded or expired
- `sector`
- `country_name`
- `borrower_genders`

In [3]:
loans = LOANS[LOANS['posted_time'].dt.year >= 2010].copy()
loans.sort_values('posted_time', inplace=True)
print('first loan: {}'.format(loans['posted_time'].min()))
loans = loans[['borrower_genders', 'loan_amount', 'sector', 'country_name', 'status']]  # cols
loans = loans[(loans['status'] == 'funded') | (loans['status'] == 'expired')]  # rows
loans.dropna(subset=['borrower_genders'], inplace=True)  # not interested in rows where gender is missing
loans.reset_index(drop=True,inplace=True)

first loan: 2010-01-01 00:26:40


In [4]:
def genders_groups(x):
    x = x.split(', ')
    if len(x) == 1:
        return x[0]
    else:
        genders = Counter(x).keys()
        if 'female' in genders and 'male' not in genders:
            return 'female_group'
        elif 'female' in genders and 'male' in genders:
            return 'mixed_group'
        else:
            return 'male_group'

In [5]:
loans['gender_group'] = loans['borrower_genders'].map(genders_groups)
loans.drop('borrower_genders', axis=1, inplace=True)
loans.to_csv('../loans_mini.csv', index=False)

In [6]:
loans.head()

Unnamed: 0,loan_amount,sector,country_name,status,gender_group
0,1825.0,Retail,Peru,funded,mixed_group
1,2075.0,Agriculture,Peru,funded,mixed_group
2,2575.0,Retail,Peru,funded,mixed_group
3,1500.0,Services,Palestine,funded,male
4,1000.0,Agriculture,Palestine,funded,female


In [7]:
f = loans.groupby('gender_group')
g = loans.groupby(['sector', 'gender_group'])
h = loans.groupby(['gender_group', 'sector'])

In [8]:
f['loan_amount'].mean()

gender_group
female           600.715775
female_group    1668.887230
male             907.032674
male_group      1540.738268
mixed_group     2092.552552
Name: loan_amount, dtype: float64

In [9]:
f['loan_amount'].sum()

gender_group
female          422017850.0
female_group    171549925.0
male            229467475.0
male_group        8076550.0
mixed_group     125230900.0
Name: loan_amount, dtype: float64

In [10]:
loans['gender_group'].value_counts()

female          702525
male            252987
female_group    102793
mixed_group      59846
male_group        5242
Name: gender_group, dtype: int64