In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')

import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import FastMarkerCluster
from folium.plugins import MarkerCluster

import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
#print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

## About Kiva.org

**Kiva envisions a world where all people hold the power to create opportunity for themselves and others.**

Kiva is an international nonprofit, founded in 2005 and based in San Francisco, with a mission to connect people through lending to alleviate poverty. 

In Kaggle Datasets' inaugural Data Science for Good challenge, Kiva is inviting the Kaggle community to help them build more localized models to estimate the poverty levels of residents in the regions where Kiva has active loans. 

This notebook tries to explore the ways to achieve that.

Part 1: EDA

In [None]:
kiva_loans=pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/kiva_loans.csv")
kiva_loans.shape

In [None]:
kiva_loans.head()

In [None]:
kiva_loans.nunique()

In [None]:
kiva_mpi_region_locations=pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/kiva_mpi_region_locations.csv")
kiva_mpi_region_locations.shape

In [None]:
kiva_mpi_region_locations.head()

In [None]:
kiva_mpi_region_locations.nunique()

### World Regions with MPI 
OPHI  calculates the Global Multidimensional Poverty Index MPI, which has been published since 2010 in the United Nations Development Programme’s Human Development Report. 

Let's plot Kiva’s estimates as to the geolocation of subnational MPI regions.

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=kiva_mpi_region_locations.world_region.value_counts().values,y=kiva_mpi_region_locations.world_region.value_counts().index)
plt.title("World Regions")
plt.savefig('world regions.png');

In [None]:
african_countries = kiva_mpi_region_locations[kiva_mpi_region_locations['world_region']== 'Sub-Saharan Africa']
plt.figure(figsize=(12,15))
sns.barplot(x=african_countries.country.value_counts().values,y=african_countries.country.value_counts().index,palette="viridis")
plt.title("African Countries")
plt.savefig('african countries.png');

## Heatmap for Multi-Dimentional Poverty index for world

In [None]:
#remove NANs
kiva_mpi_region_locations = kiva_mpi_region_locations.dropna(axis=0)

# Create weight column, using date
kiva_mpi_region_locations['weight'] = kiva_mpi_region_locations.MPI.multiply(15).astype(int)
#kiva_mpi_region_locations.weight.unique()

In [None]:
kiva_loactions_on_heatmap = folium.Map(location=[kiva_mpi_region_locations.lat.mean(), kiva_mpi_region_locations.lon.mean() ],tiles= "Stamen Terrain",
                    zoom_start = 2) 

# List comprehension to make out list of lists
heat_data = [[[row['lat'],row['lon']] 
                for index, row in kiva_mpi_region_locations[kiva_mpi_region_locations['weight'] == i].iterrows()] 
                 for i in range(0,11)]
#print(heat_data)
# Plot it on the map
hm = plugins.HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.8)
hm.add_to(kiva_loactions_on_heatmap)

hm.save('world MPI heatmap.html')

# Display the map
kiva_loactions_on_heatmap

Somehow play reverse play buttons are not visible you can click on bottom left legend squares to see heatmap moving.

Looks like Africa has got highest number of MPI Locations.

### Let's zoom on Africa

In [None]:
heat_df =kiva_mpi_region_locations[kiva_mpi_region_locations['world_region']== 'Sub-Saharan Africa']

#remove NANs
heat_df = heat_df.dropna(axis=0)

# Create weight column, using date
heat_df['weight'] = heat_df.MPI.multiply(15).astype(int)
heat_df = heat_df.dropna(axis=0,subset=['lat','lon', 'weight','LocationName'])
#heat_df.weight.unique()

In [None]:
kiva_loactions_on_heatmap_africa = folium.Map(location=[heat_df.lat.mean(), heat_df.lon.mean() ],tiles= "Stamen Terrain",
                    zoom_start = 3) 

# List comprehension to make out list of lists
heat_data = [[[row['lat'],row['lon']] 
                for index, row in heat_df[heat_df['weight'] == i].iterrows()] 
                 for i in range(0,11)]
#print(heat_data)
# Plot it on the map
hm = plugins.HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.8)
hm.add_to(kiva_loactions_on_heatmap_africa)
hm.save('africa MPI heatmap.html')

# Display the map
kiva_loactions_on_heatmap_africa

### Poverty locations for South Asia as per OPHI's MPI

Click on cluster circle to see clustered points

In [None]:
kiva_mpi_region_locations_africa = kiva_mpi_region_locations[kiva_mpi_region_locations['world_region'] == 'South Asia']
kiva_mpi_region_locations_africa.dropna(axis=0, inplace=True)
m = folium.Map(
    location=[kiva_mpi_region_locations_africa.lat.mean(), kiva_mpi_region_locations_africa.lon.mean()],
    tiles='Cartodb Positron',
    zoom_start=4
)

marker_cluster = MarkerCluster(
    name='African Locations',
    overlay=True,
    control=False,
    icon_create_function=None
)

for k in range(kiva_mpi_region_locations_africa.shape[0]):
    location = kiva_mpi_region_locations_africa.lat.values[k], kiva_mpi_region_locations_africa.lon.values[k]
    marker = folium.Marker(location=location,icon=folium.Icon(color='green', icon='ok-sign'))
    popup = kiva_mpi_region_locations_africa.LocationName.values[k]
    folium.Popup(popup).add_to(marker)
    marker_cluster.add_child(marker)

marker_cluster.add_to(m)

folium.LayerControl().add_to(m)

m.save("marker cluster south asia.html")
m

### Clustering  locations in Africa

Click on cluster circle to see clustered points

In [None]:
#%%time

m = folium.Map(
    location=[kiva_mpi_region_locations_africa.lat.mean(), kiva_mpi_region_locations_africa.lon.mean() ],
    tiles='Cartodb Positron',
    zoom_start=4
)

FastMarkerCluster(data=list(zip(kiva_mpi_region_locations_africa.lat.values, kiva_mpi_region_locations_africa.lon.values))).add_to(m)

folium.LayerControl().add_to(m)
m.save('africa loc cluster.html')

m

to be continued...