In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns;sns.set()
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Load datasets
loans = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/kiva_loans.csv')
locations = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/kiva_mpi_region_locations.csv')
themes = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/loan_theme_ids.csv')
themes_by_region = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/loan_themes_by_region.csv')

In [None]:
loans.head()

In [None]:
locations.head()


In [None]:
themes.head()

In [None]:
themes_by_region.head(10)

In [None]:
loans.columns


**Removing missing values**

Load the data frame and study the structure of the data frame.

In [None]:
print(loans.shape)
loans.info()

Separate categorical and numerical columns in the data frame and finding missing values

In [None]:
print(loans.dtypes.value_counts())
loans_num= loans.columns[loans.dtypes!='object']
loans_cat= loans.columns[loans.dtypes=='object']

print(loans[loans_num].isnull().sum().sort_values(ascending=False))
print("-----------------------------")
print(loans[loans_cat].isnull().sum().sort_values(ascending=False))

In [None]:
median=loans.partner_id.median()
loans["partner_id"].fillna(median, inplace=True)
#tags contain a lot of missing values we can drop it
loans.drop("tags", axis=1)


In [None]:
#This function will rework the gender column. As it is a simplification it has to be taken with a grain of salt
def genalloc(x):
    x = str(x)
    #The following line transforms gender strings in lists 'female, female, female, male' -> ['female','female','female','male']
    x = [x.strip() for x in x.split(',')]
    
    #monogender lists keep their value as a string
    if len(x) == 1:
        if x[0] == 'male':
            return 'male'
        elif x[0] == 'female':
            return 'female'
    #longer lists get a new string assigned based on their gender composition
    if len(x) > 1:
        if all(i in x for i in ['male', 'female']):
            return 'mixed'
        elif x[0] == 'male':
            return 'men'
        elif x[0] == 'female':
            return 'women'

In [None]:
loans['borrower_genders'] = loans['borrower_genders'].apply(lambda x: genalloc(x))

In [None]:
loans['borrower_genders'].value_counts()

In [None]:
loans.groupby('sector')['id'].nunique().plot(kind='bar',figsize = [12,6])


In [None]:
#Loan funded amount by sector

sns.catplot(data=loans,y='sector',x='funded_amount', height=8,aspect=2)
#need to rank by maximum count

In [None]:
# Agriculture loan funded amount by country
loan_agri=loans[loans['sector']=='Agriculture']


In [None]:
loan_agri.head()

In [None]:
# Create map
my_dpi=96
plt.figure(figsize=(2600/my_dpi,1800/my_dpi),dpi=my_dpi)
m=Basemap(llcrnrlon=-180, llcrnrlat=-65,urcrnrlon=180,urcrnrlat=80)
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
m.fillcontinents(color='grey', alpha=0.3)
m.drawcoastlines(linewidth=0.1, color="white")

locations['labels_enc'] = pd.factorize(locations['world_region'])[0]

m.scatter(locations['lon'], locations['lat'], alpha=0.4, c=locations['labels_enc'], cmap="Set1")
plt.text( -170, -58, 'Loan funded amount worldwide via Kiva\n\nMap done in Python by Uyen Nguyen', ha='left', va='bottom', size=9, color='#555555' )
plt.savefig('#Agri_Loan_WordWide_map1.png', bbox_inches='tight')


#Change color by continent (find continent data, join)

In [None]:
#Most popular activity for taking loan
sns.catplot(data=loans,kind='count',y='activity',x=None,height=8, aspect=2,
            order=pd.value_counts(loans['activity']).iloc[:10].index,alpha=.8,palette='husl')

In [None]:
#Most popular use of loans
loans['use'].dropna
a=loans['use'].value_counts().sort_values(ascending=False)[:10]
a
a.to_frame()
a.plot(kind='barh',xlabel='Use of Loan')

#Try to use seaborn - not done
#sns.countplot(data=a,y=use,height=8, aspect=2)


In [None]:
#Distribution of the Funded loan amount - histogram of funded amount

plt.hist(x=loans['funded_amount'],density=True,bins=30)
#Change scale (log 10?) - not done

In [None]:
# Distribution of loan amount by country
loans['loan_amount'].groupby(loans['country']).sum().sort_values(ascending=False)[:10].plot(kind='barh')

#Treemap



In [None]:
# Distribution of loan amount by sector
#Histogram
loans['funded_amount'].describe()

In [None]:
loans['borrower_genders'].value_counts()

In [None]:
loans['borrower_genders'].value_counts()

In [None]:
#Distribution of loan amount by Gender
#sns.countplot(data=loans, y='borrower_genders')

loans['id'].groupby(loans['borrower_genders']).count().plot(kind='barh',figsize = [12,6])

In [None]:
#Most common length of loan term in months
loans['term_in_months'].unique()

In [None]:
#most common countries for loan

In [None]:
# Maps of loan

In [None]:
#Theme loan combined

In [None]:
#most popular theme

In [None]:
#Africa
#Distribution of loan in Africa by location
#Distribution of loan in Africa by sector

In [None]:
#Asia
#Distribution of loan in Asia by location
#Distribution of loan in Asia by sector
#Poorest Asian countries by Multidimensional Poverty measure
#Explore loan usage in the poorest Asian countries
# Distribution of the Funded loan amount in Asia
