In [88]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
country_geo = '../../datasets/geo/world-countries.json'
import folium
pd.set_option('display.max_row', 200)

# World Indicators Dataset exploration

#### In this data exploration I will be illustrating the change in endagered plant species over time on a map of the globe, my hope is that it will show where endangered species are most present

In [16]:
data = pd.read_csv('../../datasets/world-development-indicators/Indicators.csv')

In [17]:
data.shape

(5656458, 6)

In [4]:
data.head()

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
0,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1960,133.5609
1,Arab World,ARB,Age dependency ratio (% of working-age populat...,SP.POP.DPND,1960,87.7976
2,Arab World,ARB,"Age dependency ratio, old (% of working-age po...",SP.POP.DPND.OL,1960,6.634579
3,Arab World,ARB,"Age dependency ratio, young (% of working-age ...",SP.POP.DPND.YG,1960,81.02333
4,Arab World,ARB,Arms exports (SIPRI trend indicator values),MS.MIL.XPRT.KD,1960,3000000.0


In [54]:
# Let's look at endangered species of plants and animals categorically one at a time
indicators = pd.DataFrame(data[data['IndicatorName'].str.contains('threatened')])
indicators.head()

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
5641696,Arab World,ARB,"Bird species, threatened",EN.BIR.THRD.NO,2015,297.0
5641702,Arab World,ARB,"Fish species, threatened",EN.FSH.THRD.NO,2015,572.0
5641712,Arab World,ARB,"Mammal species, threatened",EN.MAM.THRD.NO,2015,217.0
5641723,Arab World,ARB,"Plant species (higher), threatened",EN.HPT.THRD.NO,2015,318.0
5641741,Caribbean small states,CSS,"Bird species, threatened",EN.BIR.THRD.NO,2015,66.0


In [55]:
indicators['Value'].describe()
print(indicators['IndicatorName'].unique())


['Bird species, threatened' 'Fish species, threatened'
 'Mammal species, threatened' 'Plant species (higher), threatened']


In [155]:
birds = pd.DataFrame(indicators[indicators['IndicatorName'].str.contains('Bird')])
fish = indicators[indicators['IndicatorName'].str.contains('Fish')]
mammals = indicators[indicators['IndicatorName'].str.contains('Mammal')]
plants = indicators[indicators['IndicatorName'].str.contains('Plant')]

In [156]:
# Scale the results so they map better to a color range
birds.head()
birds['square_value'] = np.log(birds['Value'])
birds[birds['square_value']<0]=0
data_to_plot = birds[['CountryCode','Value']]
data_to_plot.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,CountryCode,Value
5641696,ARB,297.0
5641741,CSS,66.0
5641792,CEB,130.0
5641837,EAS,1095.0
5641882,EAP,739.0


In [169]:
birds[birds['Value'] > 500] =0
mammals[mammals['Value']>500] =0
plants[plants['Value']>1000] =0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


### Lets visualize these 4 categories 

In [170]:
def plot_map(df,columns):    
    m = folium.Map(location=[45.5236, -122.6750],zoom_start=1.5)
    folium.Choropleth(
        geo_data=country_geo,
        name='choropleth',
        data=df,
        columns=columns,
        key_on='feature.id',
        fill_color='YlOrRd',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='log of number of endangered bird species'
    ).add_to(m)
    folium.LayerControl().add_to(m)
    return m

In [171]:
m = plot_map(birds, ['CountryCode', 'Value'])
m

In [172]:
m = plot_map(mammals, ['CountryCode', 'Value'])
m

In [173]:
m = plot_map(plants, ['CountryCode', 'Value'])
m