### Exploratory Data Analysis For Paraguay

In [None]:
import pandas as pd # import libraries
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


#### 1. Read in Kiva loans CSV File 

#### Get an Overview of Overall Data

In [None]:
kiva = pd.read_csv('kiva_loans.csv')

kiva.head()

In [None]:
kiva.columns

In [None]:
kiva.shape 

In [None]:
kiva.describe() # to check for non valued data in columns

In [None]:
kiva.info() # get more information of the data frame

### 2. Check for missing values

In [None]:
null = kiva.isnull().sum().to_frame().reset_index()
null.columns=['column','Frequency']
null.sort_values('Frequency',inplace=True)

### 3. Subsetting of Paraguay Data to analyse it individually

In [None]:
Paraguay = kiva[kiva['country']=='Paraguay']

Paraguay.head()

### 4.Ranking Paraguay Top 10 nations by Total Loan Amount

In [None]:
top_ten_nations=kiva.groupby('country')['loan_amount'].sum().sort_values(ascending = False).reset_index()

top_ten_nations_df = top_ten_nations.head(10)

In [None]:
sns.set(color_codes = True,style = 'darkgrid') #set the aesthetic of produced figures.

In [None]:
plt.figure(figsize = (12,6))
sns.barplot(x='country',y='loan_amount',data=top_ten_nations.head(10))
plt.show

In [None]:
country_fund = kiva.groupby('country').sum()['loan_amount'].sort_values(ascending = False).to_frame().reset_index()
country_fund.columns = ['Country', 'Total_amount']
country_fund.head(10)

In [None]:
fig = px.choropleth(country_fund, 
                    locations="Country", 
                    locationmode = "country names",
                    color="Total_amount",
                    
                    hover_name="Country"
                   )
fig.update_layout(
    title_text = 'Top Countries By Total Amount Loaned',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))
    
fig.show()

### 5. Ranking of Loan per Sector in Paraguay

In [None]:
Paraguay_region_sector=Paraguay.groupby(['region','sector','borrower_genders','repayment_interval'])\
['loan_amount','funded_amount','lender_count'].sum().sort_values(by='loan_amount',ascending=False)\
.reset_index()
Paraguay_region_sector.head()

#### Plot By Sector

In [None]:
plt.figure(figsize=(15,5))

plt.title('Loan Amount by sector')

plt.xticks(rotation=90)

sns.barplot(x='sector',y='loan_amount',data =Paraguay_region_sector,ci=None, color = 'purple',
           estimator=np.sum),

plt.show()

### Plot By Region

### 6. Determine if the requested Loan is fully Funded or partially fund and if partially why?

In [None]:
Paraguay_region_sector=Paraguay.groupby(['region','sector','funded_amount','repayment_interval'])\
['loan_amount','lender_count'].sum().sort_values(by='loan_amount',ascending=False)\
.reset_index()
Paraguay_region_sector.head()

In [None]:
plt.figure(figsize=(15,5))

plt.title('Loan Amount Vs Funded Amount')

plt.xticks(rotation=90)

sns.scatterplot(y='funded_amount',x='loan_amount',data =Paraguay_region_sector, hue='sector',
               size='loan_amount',sizes = (150,100), alpha = 0.90)

plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))

plt.title('Repayment Interval')

plt.xticks(rotation=90)

sns.barplot(x='sector',y='loan_amount',data =Paraguay_region_sector,ci=None, color = 'blue',
           estimator=np.sum,hue='repayment_interval'),
plt.show()

Food has the highest irregular repayment interval followed by Retail and Clothing looking at it they are the most funded sectors in the mentioned flow.The higher the funding the more the sector is irregular in repayment


### Matrix plot (heatmap)

### 6.Find correlation of different variable

Paraguay = kiva[kiva['country']=='Paraguay']

Paraguay.corr()

In [None]:
plt.figure(figsize=(10,5))

sns.heatmap(Paraguay.corr(), cmap = 'Blues',annot = True, linewidth =0.5)

plt.show() #the darker the color the higer the relationship

#### 6.1 is a relationship between lender count, loan amount and funded amount

In [None]:
import sys

!conda install --yes plotly

### 7. Which Gender gets more loan

In [None]:
gender_list=[]
for gender in Paraguay.borrower_genders.values:#Goes through every row in the column
    if str(gender) != 'nan':# skips null cells
        gender_list.extend([lst.strip() for lst in gender.split(',')])
        #In the cell,we strip() remove white spaces eg " Paraguay " and split comma separated values into individual elements
        #Using extend
temp_data = pd.Series(gender_list).value_counts()
gender = temp_data.to_frame().head(20).reset_index()
gender.columns = ['Gender', 'Frequency']

In [None]:
gender

In [None]:
labels = gender.Gender
values = gender.Frequency
fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(
title='Represention of Genders Funded by Kiva Loans In Cambodia ',
title_x = 0.2)
fig.show()

Female get 97% of Loan in Paraguay compared to 2% of Loan by men



#### 8. What is the repayment Intervals

In [None]:
repayment_interval = Paraguay['repayment_interval'].value_counts().to_frame().reset_index()
repayment_interval.columns = ['Repayment_interval','Frequency']

#### 8.1 Frequency Table of Repayment Intervals

In [None]:
repayment_interval

In [None]:
labels = repayment_interval.Repayment_interval
values = repayment_interval.Frequency
fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(
title='Represention of Repayment Intervals In Paraguay ',
title_x = 0.2)
fig.show()

The Loan is paid on monthly basis other than irregular may be an indication of committment to repayment than default

#### Recommendation

1.Paraguay gets the highest loan for Food this means its is importation of food. The country should be encouraged to get more loan for farming since exportation of the food will boost the economy unlike reverse.

2.The least areas that get loans are construction and manufacturing Kiva to encourage more loans for infrastructure, constructions and manufacturing

3. More women than men take loans more men should be encouraged to loans so that the loan term boost of the economy through manufacturing, construction and infrastructure.

4.Paraguay pays loan at 55% on monthly intervals that irregular, for accountability more loan repayers should be encouraged to pay on a constant intervals

5. Paraguay has no Bullet payment so Kiva it earns more interest which is a good approach