#### Import Libraries

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
sns.set(color_codes = True)
import plotly.express as px
import plotly.graph_objects as go

#### Read in Kiva Loans File

In [None]:
kiva = pd.read_csv('kiva_loans.csv')

### Excercise Objective

*Undertake analysis and generate useful insights into lending by Kiva.*

*Offer suggestions to Kiva to help improve their offerings or shed light on the performance of their project.*

**About Kiva**

Kiva is an international nonprofit, founded in 2005 in San Francisco, with a ***mission*** to expand financial access to help underserved communities thrive. We do this by crowdfunding loans and unlocking capital for the underserved, improving the quality and cost of financial services, and addressing the underlying barriers to financial access around the world.

**Approach:** analyse data and offer suggestions that support Kiva's mission in various ways:

How can more and better impact be generated?

1. Increase loan disbursement: Increase the amount of crowdfunding by either getting more people to join or by getting individuals to donate more on average
2. Improve targeting of the underserved (and not just lend more to people who already have access to credit)

#### Subset Data for Uganda

In [None]:
ug = kiva[kiva['country'] == 'Uganda'].reset_index()

# 1. Data Overview & Cleaning

In [None]:
ug.head(2)

In [None]:
ug.info()

In [None]:
ug.shape

### Dealing with Missing Values

In [None]:
ug.isna().sum() # Missing variables as proportinn of total: 11.4% funded_time; 25.8% tags >> too high.

In [None]:
ug.duplicated().sum()

In [None]:
ug.drop('tags', axis = 1,inplace = True) # Drop the tags column

In [None]:
ug.shape

In [None]:
ug.describe()

In [None]:
ug.describe(include = 'O') # need to deal with multiple currencies

In [None]:
ug['currency'].unique()

In [None]:
ug['currency'].nunique()

In [None]:
ug['currency'].value_counts()

In [None]:
ug.groupby('currency').sum().reset_index() # significant amount disbersed in USD

# 2. Descriptive Statistics

#### Define Variables for Charts

In [None]:
sector = ug_sector['sector'] 
loan = ug_sector['loan_amount'] 
fund = ug_sector['funded_amount'] 
lender = ug_sector['lender_count']

In [None]:
ug_sector = ug.groupby('sector')['loan_amount', 'lender_count', 'funded_amount'].sum().sort_values(by = 'loan_amount', ascending = False).reset_index()
ug_sector

## Big Picture

In [None]:
kiva['country'].unique()

In [None]:
kiva['country'].nunique()

In [None]:
kiva['avg_loan_by_lender'] = kiva['loan_amount'] / kiva['lender_count']
kiva['avg_loan_by_lender']

#### Regional comparison of loans

In [None]:
kiva.groupby('country')['loan_amount', 'lender_count'].sum().sort_values(by = 'loan_amount', ascending = False).reset_index().head(20)
# Rwanda has fewer lenders but higher loan amount despite 
# being a much smaller country than Uganda while Kenya has over 2x more loans that uganda

#### Trends over time

In [None]:
ug.index = pd.to_datetime(ug['funded_time'])

fund_time = ug['funded_time'].resample('w').count().to_frame()
fund_time.columns  = ['Frequency']
fig = go.Figure()
fig.add_trace(go.Scatter(x=fund_time.index, y=fund_time.Frequency,
                    mode='lines',
                    name='lines'))
fig.update_layout(
    title='Loans Issued of Over Time in Uganda (weekly)',
    title_x=0.5,
    yaxis_title = 'No. of loans',
    xaxis_title = 'Timeline')
fig.show()

In [None]:
rw = kiva[kiva['country'] == 'Rwanda'].reset_index()

rw.index = pd.to_datetime(rw['funded_time'])

fund_time = rw['funded_time'].resample('w').count().to_frame()
fund_time.columns  = ['Frequency']
fig = go.Figure()
fig.add_trace(go.Scatter(x=fund_time.index, y=fund_time.Frequency,
                    mode='lines',
                    name='lines'))
fig.update_layout(
    title='Loans Issued of Over Time in Rwanda (weekly)',
    title_x=0.5,
    yaxis_title = 'No. of loans',
    xaxis_title = 'Timeline')
fig.show()

In [None]:
ke = kiva[kiva['country'] == 'Kenya'].reset_index()

ke.index = pd.to_datetime(ke['funded_time'])

fund_time = ke['funded_time'].resample('w').count().to_frame()
fund_time.columns  = ['Frequency']
fig = go.Figure()
fig.add_trace(go.Scatter(x=fund_time.index, y=fund_time.Frequency,
                    mode='lines',
                    name='lines'))
fig.update_layout(
    title='Loans Issued of Over Time in Kenya (weekly)',
    title_x=0.5,
    yaxis_title = 'No. of loans',
    xaxis_title = 'Timeline')
fig.show()

## Uganda Analysis

##### Variable definitions


*Funded Amount:* The amount disbursed by Kiva to the field agent(USD)

*Loan Amount:* The amount disbursed by the field agent to the borrower(USD)

### Continuous Variable Analysis

#### Histograms

In [None]:
ug.info()

In [None]:
px.histogram(ug, x = 'loan_amount', range_x = [0,6000])

In [None]:
px.histogram(ug, x = 'funded_amount', range_x = [0,6000])

In [None]:
px.histogram(ug, x = 'lender_count', range_x = [0,100])

In [None]:
px.histogram(ug, x = 'term_in_months', range_x = [0,50])

### Categorical variable analysis

#### By activity 

In [None]:
ug_activity = ug.groupby('activity')['loan_amount', 'lender_count', 'funded_amount'].sum().sort_values(by = 'loan_amount', ascending = False).reset_index()
ug_activity.head(10)

#### By region

In [None]:
ug_region = ug.groupby('region')['loan_amount', 'lender_count', 'funded_amount'].sum().sort_values(by = 'loan_amount', ascending = False).reset_index()
ug_region.head(20)

#### By sector

In [None]:
labels = sector
sizes = loan

fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  

plt.show()

In [None]:
plt.figure(figsize = (15,15)) 

plt.subplot(2,2,1)
plt.title('Loan Amount by Sector') 
plt.xticks(rotation = 90) 
sns.barplot(x = 'sector', y = 'loan_amount', data = ug_sector, ci = None, color = 'lightblue', estimator= sum)


plt.subplot(2,2,2)
plt.title('Funded Amount by Sector') 
plt.xticks(rotation = 90) 
sns.barplot(x = 'sector', y = 'funded_amount', data = ug_sector, ci = None, color = 'lightblue')


plt.subplot(2,2,3)
plt.title('Lender Count by Sector') 
plt.xticks(rotation = 90) 
sns.barplot(x = 'sector', y = 'lender_count', data = ug_sector, ci = None, color = 'lightblue')

plt.show()

In [None]:
plt.figure(figsize = (15,10))

plt.title('Loan Amount and Funded Amount by Sector', fontsize = 15) 
plt.xlabel('Sector', fontsize = 15) 
plt.ylabel('Loan and Funded Amount', fontsize = 15)

x_indices = np.arange(len(sector)) 
width = 0.3

plt.xticks(ticks = x_indices, labels = sector, rotation = 90)

plt.bar((x_indices + width), fund, width = width, label = 'Funded Amount')

plt.bar(x_indices, loan, width = width, label = 'Loan Amount') 
plt.legend() 
plt.show()

In [None]:
ug_sector['Diff_Loan_Lender_Amounts'] = ug_sector['loan_amount'] - ug_sector['funded_amount']
ug_sector.sort_values(by = 'Diff_Loan_Lender_Amounts', ascending = False) # sectors where loan amount is much greater than funded 
# amount implies unmet demand for credit, namely Retail, Food, Agriculture, Housing

#### What happens if a loan doesn’t fully fund on Kiva?

Usually, loans on Kiva have 30 days to successfully fundraise. But in most cases, if a loan doesn’t fully fund on Kiva the individual borrower is not directly affected. That’s because most of **Kiva’s Field Partners give borrowers access to credit before posting their loans on the Kiva website (what we call pre-disbursal), so the borrower can use the funds immediately.**

The crowdfunded money raised on Kiva is used to backfill the loan amount, and when the borrower makes repayments they're passed along to the specific Kiva lenders who supported the loan. There are 2 funding models on Kiva:

Fixed: the total loan amount must be raised in order for funds to be sent to the Field Partner. If the loan is not funded in full within the fundraising period, the loan will expire and any funds raised will be returned to lenders' Kiva accounts.

Flexible: any funds raised within 30 days will be passed along to the Field Partner facilitating the loan and they will come up with other sources of funding to cover the rest of the loan amount.

## Analysis of Means

In [None]:
ug_sector_med = ug.groupby('sector')['loan_amount', 'lender_count', 'funded_amount'].median().sort_values(by = 'loan_amount', ascending = False).reset_index()
ug_sector_med # Use median because of skewed distribution?

In [None]:
ug_sector_mean = ug.groupby('sector')['loan_amount', 'lender_count', 'funded_amount'].mean().sort_values(by = 'loan_amount', ascending = False).reset_index()
ug_sector_mean

##### Lender Count: 
The total number of lenders that contributed to a loan

In [None]:
ug_sector_mean['loan_per_lender'] = ug_sector['loan_amount'] / ug_sector['lender_count']
ug_sector_mean.sort_values(by = 'loan_per_lender', ascending = False).reset_index()

In [None]:
loan_mean = ug_sector_mean['loan_amount']
fund_mean = ug_sector_mean['funded_amount'] 
lender_mean = ug_sector_mean['lender_count'].sort_values(ascending = False)
amount_lender_mean = ug_sector_mean['loan_per_lender']

In [None]:
plt.figure(figsize = (15,10))

plt.title('Average - Lender Count & Lender Loan Amount by Sector', fontsize = 15) 
plt.xlabel('Sector', fontsize = 15) 

x_indices = np.arange(len(sector)) 
width = 0.3

plt.xticks(ticks = x_indices, labels = sector, rotation = 90)

plt.bar((x_indices + width), lender_mean, width = width, label = 'Avg. Lender Count') 
plt.bar(x_indices, amount_lender_mean, width = width, label = 'Avg. Lender Amount') 

plt.legend() 
plt.show()

## Insights and Recommendations

1. Rwanda has fewer lenders and a higher aggregate loan amount despite being a much smaller country than Uganda. Kenya has more than double the loan amount of Uganda. Frequency of loans issued in Uganda has been falling over time

    ***Recommendation - investigate Rwanda and Kenya to potentially learn lessons***
    

2. Sectors where aggregate loan amount is much greater than funded amount implies unmet demand for credit, namely Retail, Food, Agriculture, Housing.

    ***Recommendation - direct crowdfunding to these key sectors where there is unmet demand***
    
    
3. Some sectors receive a lot more funding than others. There is much wider variation in the average number of lenders accross sectors compared to the average amount lent by a lender.

    ***Recommendation - marketing efforts should focus on increasing the number of lenders for sectors that receive less total credit***