#### CASE STUDY OF BRAZIL 

##### Study objectives

* To understand the effect of Kiva loans on the economy
* To unpack the member borrowing and payment abilityfrom Kiva loans
* To analyse insights for future deployement of Kiva loans in Brazil
* To evaluate which regions are over or under funded from  the fund
* To undersatnd consumer behaviour in terms of payment                 


###### Import all libraries that will be used through the analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
sns.set(color_codes = True)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

###### Import kiva Data base

In [None]:
kiva = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/kiva_loans.csv')

###### Import Country Subset Data from kiva Data base - Brazil

In [None]:
brazil = kiva[kiva['country'] == 'Brazil'].reset_index(drop = True)
brazil.head(2)

###### Return data overview of Brazil

In [None]:
brazil.head(5)

###### Return all columns to have an overview of the data

In [None]:
brazil.columns

In [None]:
brazil.shape

In [None]:
brazil.info()

###### Return nunique and unique columns

In [None]:
brazil['region'].nunique()

In [None]:
brazil['region'].unique()

###### Return measures of central tendancy and dispersion

In [None]:
brazil.describe()

###### Check duplicates and missing value that might affect the analysis

In [None]:
brazil.duplicated().sum()

In [None]:
brazil.isna().sum()

###### Return frequency barchat, to give a visualisation of the missing numbers

In [None]:
null  = brazil.isnull().sum().to_frame().reset_index()
null.columns = ['Column', 'Frequency']
null.sort_values('Frequency',inplace=True)

In [None]:
fig = go.Figure()
colors=[' black ']*len(null.Column)
fig.add_trace(go.Bar(y=null.Frequency,x=null.Column,marker_color=colors))
fig.update_layout(
title = 'Distribution of Null Values in Columns',
    title_x=0.5,
    xaxis_title = 'Columns',
    yaxis_title = 'No of missing Values'
)
fig.show()

###### Return value counts for every region in Brazil

In [None]:
brazil['region'].value_counts()

###### Return loan amount per region

In [None]:
loans = brazil.groupby('region')['loan_amount'].sum().sort_values(ascending =False).reset_index().head(9)
loans

###### Return loan amount per sector 

In [None]:
loans = brazil.groupby('sector')['loan_amount'].sum().sort_values(ascending =False).reset_index().head(9)
loans

###### Return loan amount per activity

In [None]:
loans = brazil.groupby('activity')['loan_amount'].sum().sort_values(ascending =False).reset_index().head(9)
loans

###### Define gender and return contribution data per gender

In [None]:
def gender_lead(gender):
    gender = str(gender)
    if gender.startswith('f'):
        gender = 'female'
    else:
        gender = 'male'
    return gender

In [None]:
brazil['gender_lead'] = brazil['borrower_genders'].apply(gender_lead)
brazil['gender_lead'].nunique()

In [None]:
f = brazil['gender_lead'].value_counts()[0]
m = brazil['gender_lead'].value_counts()[1]
print('{} females ({}%) vs {} males ({}%) got loans'.format(f,round(f*100/(f+m),2),m,round(m*100/(f+m)),2))

###### Return frequency table for gender and visualize the data on a piechart

In [None]:
gender_lead = brazil.gender_lead.value_counts().to_frame().head(20).reset_index()
gender_lead.columns=['gender_lead','Frequency']

In [None]:
gender_lead

In [None]:
labels = gender_lead.gender_lead
values = gender_lead.Frequency
fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(
title='Represention of Gender Funded by Kiva Loans In Brazil ',
title_x = 0.2)
fig.show()

###### Visualize funded amount on a BOX Plot; we had two outliers that received 20k and 30k majority recieved  3k and below

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(name='funded amount',y=brazil.funded_amount))

fig.update_layout(
title = 'Boxplot Distribution of Funded amount in brazil',
title_x = 0.5,
yaxis_title='Amount in dollars')
fig.show()

###### Visualize funded amount on histogram;majority of the loans were below 3K

In [None]:
plt.figure(figsize = (10,5))
plt.title('Funded Amount', fontsize = 15)
plt.hist(brazil['funded_amount'], edgecolor = 'k', bins = 15)
xaxis_title = 'Funded Amount',
yaxis_title = 'Frequency',
plt.show()

###### Visualize payment terms in months on histogram;majority of the loans being paid below 10 months

In [None]:
plt.figure(figsize = (10,5))
plt.title('Distribution of Term in Months', fontsize = 15)
plt.hist(brazil['term_in_months'], edgecolor = 'k', bins = 15)
plt.show()

###### Catagorical Variables; return frequency tables for activity

In [None]:
activity = brazil.activity.value_counts().to_frame().head(20).reset_index()
activity.columns=['Activity','Frequency']

In [None]:
activity

###### Return bar charts for activities in Brazil;clothing and beauty salon most funded in the Brazil while phone accessories,personal products,sewing, electrical machines and catering leats funded

In [None]:
fig = go.Figure()
colors=[' black ']*len(activity.Activity)
fig.add_trace(go.Bar(y=activity.Activity,x=activity.Frequency,orientation='h',marker_color=colors))
fig.update_yaxes(autorange='reversed')
fig.update_layout(
title = 'Top 20 Activities Funded By Kiva',
    title_x=0.5,
    xaxis_title = 'Frequency',
    yaxis_title = 'Activity'
)
fig.show()

###### Return repayment intervals on frequency table and pie chart;majority of the payments were irregular

In [None]:
repayment_interval = brazil['repayment_interval'].value_counts().to_frame().reset_index()
repayment_interval.columns = ['Repayment_interval','Frequency']

In [None]:
repayment_interval

In [None]:
labels = repayment_interval.Repayment_interval
values = repayment_interval.Frequency
fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(
title='Represention of Repayment Intervals In Brazil ',
title_x = 0.2)
fig.show()

###### Return funded amount and loan amount on a scatterplot;seems to have a linear relationship between the two variables

In [None]:
sns.scatterplot(x='funded_amount',y='loan_amount',data=brazil);

###### Return measures of variation;There is apositive perfect correlation between the two variables (funded amount and terms in months)

In [None]:
np.corrcoef(brazil.funded_amount,brazil.term_in_months)

In [None]:
sns.scatterplot(x='funded_amount',y='lender_count',data=brazil);

###### Return measures of variation;There is apositive perfect correlation between the two variables (funded amount and lender account)

In [None]:
np.corrcoef(brazil.funded_amount,brazil.lender_count)

###### Return total amount received by sector

In [None]:
count = round(brazil.groupby(['sector'])['loan_amount'].sum().sort_values(ascending=False))
fig = go.Figure()
fig.add_trace(go.Bar(y=count.index,x=count.values,orientation='h'))
fig.update_yaxes(autorange='reversed')
fig.update_layout(
title = 'Top Sectors By Total Loan Amount Recieved',
    title_x=0.5,
    xaxis_title='loan amount in Dollar',
    yaxis_title='Sector'
)
fig.show()

###### Return total amount received by region

In [None]:
count = round(brazil.groupby(['region'])['loan_amount'].sum().sort_values(ascending=False)).head(20)
fig = go.Figure()
fig.add_trace(go.Bar(y=count.index,x=count.values,orientation='h'))
fig.update_yaxes(autorange='reversed')
fig.update_layout(
title = 'Top Region By Total Loan Amount Recieved',
    title_x=0.5,
    xaxis_title='loan amount in Dollar',
    yaxis_title='Region'
)
fig.show()

###### Return loan trends over time ;weekly

In [None]:
brazil.index = pd.to_datetime(brazil['funded_time'])
fund_time = brazil['funded_time'].resample('w').count().to_frame()
fund_time.columns  = ['Frequency']
fig = go.Figure()
fig.add_trace(go.Scatter(x=fund_time.index, y=fund_time.Frequency,
                    mode='lines',
                    name='lines'))
fig.update_layout(
    title='Loan Trends of Over Time(weekly)',
    title_x=0.5,
    yaxis_title='No. of loans',
    xaxis_title='Timeline'

)
fig.show()

###### Return loan themes by region

In [None]:
themes = pd.read_csv('/kaggle/input/data-science-for-good-kiva-crowdfunding/loan_themes_by_region.csv')
themes_brazil = themes[themes['country'] == 'Brazil']
themes_brazil.head(20)


###### Map loan amount and per region; to visualize loan distribution across brazil

In [None]:
px.set_mapbox_access_token('pk.eyJ1IjoiZGdhdmFsYSIsImEiOiJja2QxN2h0ZjkxMHF4MnNtdm1zNXBqenZ0In0.T6EaM2miEr6XrTflmfkhFQ')

In [None]:
px.scatter_mapbox(themes_brazil, lat = 'lat', lon = 'lon', color = 'region',size = 'amount', size_max = 15)

### Take Outs

* Need for clarity bullet under repayment interval
* Kiva to clarify why loans are only centralised in central part of Brazil
* Deep analysis and understanding why majority of loans issued are below 3k
* Deep analysis and understanding why majority of laon payment are below 10 months
* Deep analysis and understaniging why loan uptake is high in Jan year in year out and low in july

# Thank you