In [51]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import pickle
import numpy as np
from ipywidgets import interact
import json

In [505]:
pickle_file="./Data/uni_df.pickle"
df=pickle.load(open(pickle_file,'rb'))

In [68]:
def compute_score(df,weights=None):
    
    # Defining useful method
    def drop_words( s , w=1 , end=True):
        if end:
            return s.rsplit(' ',w)[0]
        else:
            return s.split(' ',w)[-1]
    
    # In df, getting a list of the columns corresponding to import, export and production respectively (keeping year and area)
    import_cols=[col for col in df.columns if ('1000 Head' not in col) and 'import' in col.lower()]
    export_cols=[col for col in df.columns if ('1000 Head' not in col) and 'export' in col.lower()]
    prod_cols=[col for col in df.columns if ('1000 Head' not in col) and 'production' in col.lower()]
    import_cols.extend(["Area","Year"])
    export_cols.extend(["Area","Year"])
    prod_cols.extend(["Area","Year"])
    
    df_prod=df[prod_cols].set_index(['Area','Year'])
    df_exp=df[export_cols].set_index(['Area','Year'])
    df_imp=df[import_cols].set_index(['Area','Year'])
    
    # Removing useless words to facilitate matching (unit, export, import, production)
    df_prod.columns=[drop_words(col,2) for col in df_prod.columns]
    df_imp.columns=[drop_words(col,3) for col in df_imp.columns]
    df_exp.columns=[drop_words(col,3) for col in df_exp.columns]
    
    # Finding the features present in all three dfs
    prod_columns=[drop_words(s) for s in df_prod.columns]
    prod_columns=[s for s in prod_columns if len(df_imp.filter(regex=s).columns)>0]
    interesting_imp=[df_imp.filter(regex=s).columns[0] for s in prod_columns if len(df_imp.filter(regex=s).columns)>0]
    interesting_exp=[df_exp.filter(regex=s).columns[0] for s in prod_columns if len(df_exp.filter(regex=s).columns)>0]
    interesting_prod=[df_prod.filter(regex=s).columns[0] for s in prod_columns if len(df_prod.filter(regex=s).columns)>0]
    
    df_exp=df_exp[interesting_exp]
    df_imp=df_imp[interesting_imp]
    df_prod=df_prod[interesting_prod]
    
    # initialising new df with index
    score=pd.DataFrame(index=df_exp.index)
    
    # calculating score
    array=list(map(list,zip(*[df_prod.columns,df_imp.columns,df_exp.columns])))
    for prod,imp,exp in array:
        score[drop_words(prod)]=df_prod[prod]/(df_prod[prod]+df_imp[imp]-df_exp[exp])
        
    if weights==None:
        return score
    
    
    
    ## To implement: 
    ##
    ## Aggregate score if weights != None 
    ##
    ##
    
    
    return score

In [506]:
import pandas as pd
import numpy as np
import seaborn as sns
import folium
from folium.plugins import TimestampedGeoJson
import json

In [568]:
def visualise_world_data(df,to_visualise,year,log=True):
    
    # importing geojson and transforming to pandas
    geo_data=json.load(open("Data/world-countries.json"))
    dics=geo_data['features']
    clean_dics=[]
    for country in dics:
        clean_dics.append({'Country':country['properties']['name'],
                          'geometry':country['geometry']})
    geo_df=pd.DataFrame(clean_dics)
    
    # cleaning df to allow merge with geo data
    name_correction = {'Czechia': "Czech Republic",
       'Russian Federation':'Russia',
       "Serbia":"Republic of Serbia",
       'The former Yugoslav Republic of Macedonia':'Macedonia',
       'China, mainland':'China',
       'Viet Nam':'Vietnam',
       'Venezuela (Bolivarian Republic of)':'Venezuela',
       'Iran (Islamic Republic of)':'Iran',
       'Syrian Arab Republic':"Syria",
       'Bolivia (Plurinational State of)': 'Bolivia',
       "Côte d'Ivoire": "Ivory Coast",
       'Congo':"Republic of the Congo",
       "Lao People's Democratic Republic":'Laos',
       "Democratic People's Republic of Korea":"North Korea",
       'Republic of Korea':"South Korea",
       'USSR':                            ['Armenia', 'Azerbaijan','Belarus', 'Estonia', 'Georgia',
                                               'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania',
                                               'Montenegro', 'Republic of Moldova', 'Russia',
                                               'Republic of Serbia', 'Timor-Leste', 'Turkmenistan', 'Ukraine',
                                               'Uzbekistan'],
       'Ethiopia PDR':                     ['Eritrea','Ethiopia'],
       'Yugoslav SFR':                     ['Kosovo', 'Slovenia', 'Croatia',
                                                'Macedonia', 'Bosnia and Herzegovina'],
       'Yemen Dem' :                       ['Yemen'],        
       'Czechoslovakia':                   ["Czech Republic", 'Slovakia'],
       'Netherlands Antilles (former)':    ['Curaçao', 'Sint Maarten (Dutch Part)'],
       'Sudan (former)':                   ['South Sudan', 'Sudan']}
    
    # usefull method to clean country names
    def correct_country_names(old_name, dic):
        if old_name in dic.keys() :
            return dic[old_name]
        return old_name
    
    # cropping to df to data of interest
    df_visu=df[['Area','Year',to_visualise]]
    df_visu=df_visu[df_visu.Year==year]
    
    # cleaning country names
    df_visu.Area=df_visu.Area.apply(lambda x:correct_country_names(x,name_correction))
    df_visu=df_visu.explode('Area')
    
    # Merging with geo data
    df_visu=geo_df.merge(df_visu,how='left',left_on='Country',right_on='Area')
    
    if log:
        df_visu[to_visualise]=df_visu[to_visualise].apply(lambda x : np.log10(x))
    
    # creating Json string for folium
    features=[]
    for _,row in df_visu.iterrows():
        feature={
            'type' : 'Feature',
            
            'properties':{'name':row['Country'],
                         'value':row[to_visualise]},
            
            'geometry':row['geometry'],
            }
        features.append(feature)
    
    c=folium.Choropleth({'type':'FeatureCollection','features':features},
                  df_visu[['Country','(GDP, million $)']],
                  columns=['Country','(GDP, million $)'],
                  key_on='feature.properties.name',
                  fill_color='YlGn',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  nan_fill_opacity=0.0)
    c.geojson.add_child(folium.features.GeoJsonTooltip(['name','value']))
    m=folium.Map()
    c.add_to(m)
    return m

In [569]:
visualise_world_data(df,'(GDP, million $)',2002,True)

In [507]:
geo_data=json.load(open("Data/world-countries.json"))

In [508]:
dics=geo_data['features']

In [509]:
clean_dics=[]
for country in dics:
    clean_dics.append({'Country':country['properties']['name'],
                      'geometry':country['geometry']})

In [510]:
geo_df=pd.DataFrame(clean_dics)

In [511]:
name_correction = {'Czechia': "Czech Republic",
       'Russian Federation':'Russia',
       "Serbia":"Republic of Serbia",
       'The former Yugoslav Republic of Macedonia':'Macedonia',
       'China, mainland':'China',
       'Viet Nam':'Vietnam',
       'Venezuela (Bolivarian Republic of)':'Venezuela',
       'Iran (Islamic Republic of)':'Iran',
       'Syrian Arab Republic':"Syria",
       'Bolivia (Plurinational State of)': 'Bolivia',
       "Côte d'Ivoire": "Ivory Coast",
       'Congo':"Republic of the Congo",
       "Lao People's Democratic Republic":'Laos',
       "Democratic People's Republic of Korea":"North Korea",
       'Republic of Korea':"South Korea",
       'USSR':                            ['Armenia', 'Azerbaijan','Belarus', 'Estonia', 'Georgia',
                                               'Kazakhstan', 'Kyrgyzstan', 'Latvia', 'Lithuania',
                                               'Montenegro', 'Republic of Moldova', 'Russia',
                                               'Republic of Serbia', 'Timor-Leste', 'Turkmenistan', 'Ukraine',
                                               'Uzbekistan'],
       'Ethiopia PDR':                     ['Eritrea','Ethiopia'],
       'Yugoslav SFR':                     ['Kosovo', 'Slovenia', 'Croatia',
                                                'Macedonia', 'Bosnia and Herzegovina'],
       'Yemen Dem' :                       ['Yemen'],        
       'Czechoslovakia':                   ["Czech Republic", 'Slovakia'],
       'Netherlands Antilles (former)':    ['Curaçao', 'Sint Maarten (Dutch Part)'],
       'Sudan (former)':                   ['South Sudan', 'Sudan']}


In [512]:
def correct_country_names(old_name, dic):
    if old_name in dic.keys() :
        return dic[old_name]
    return old_name

In [513]:
df_visu=df[['Area','Year','(GDP, million $)']]
df_visu.describe()

Unnamed: 0,Year,"(GDP, million $)"
count,9196.0,9196.0
mean,1993.183449,157318.8
std,13.239335,832735.5
min,1970.0,2.585174
25%,1982.0,1169.111
50%,1994.0,6409.638
75%,2005.0,42529.63
max,2015.0,18036650.0


In [514]:
df_visu.Area=df_visu.Area.apply(lambda x:correct_country_names(x,name_correction))
df_visu=df_visu.explode('Area')
df_visu.describe()

Unnamed: 0,Year,"(GDP, million $)"
count,9717.0,9717.0
mean,1992.535865,174760.9
std,13.293055,818221.1
min,1970.0,2.585174
25%,1981.0,1283.885
50%,1993.0,7273.237
75%,2004.0,54008.06
max,2015.0,18036650.0


In [515]:
df_visu=geo_df.merge(df_visu,how='left',left_on='Country',right_on='Area')

In [532]:
df_visu['(GDP, million $)']=df_visu['(GDP, million $)'].apply(np.log10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [533]:
max_value=df_visu['(GDP, million $)'].max()
min_value=df_visu['(GDP, million $)'].min()
bins=np.linspace(min_value,max_value,8)

In [534]:
df_visu['color']=df_visu['(GDP, million $)'].apply(lambda x : np.digitize(x,bins))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [535]:
color_scale = np.array(['#053061','#2166ac','#4393c3','#92c5de','#d1e5f0','#fddbc7','#f4a582','#d6604d','#b2182b','#67001f'])

In [536]:
df_visu=df_visu[df_visu['Year']==2005]

In [None]:
df_visu=df_visu.dropna()

In [539]:
features=[]
for _,row in df_visu.iterrows():
    feature={
        'type' : 'Feature',
        'properties':{'name':row['Country'],
                     'value':row['(GDP, million $)'],
                     'color':color_scale[row['color']] },
        'geometry':row['geometry'],
        }
    features.append(feature)

In [540]:
def style_function(feature):
    return {'fillOpacity':0.8,
            'weight':0.1,
            'fillColor':feature['properties']['color']}

data=folium.GeoJson({'type':'FeatureCollection','features':features},
                    style_function=style_function,
                    tooltip=folium.features.GeoJsonTooltip(fields=['name','value']))

In [564]:
n=folium.Map()
c=folium.Choropleth({'type':'FeatureCollection','features':features},
                  df_visu[['Country','(GDP, million $)']],
                  columns=['Country','(GDP, million $)'],
                  key_on='feature.properties.name',
                  fill_color='YlGn',
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  nan_fill_opacity=0.0)
c.add_to(n)
c.geojson.add_child(folium.features.GeoJsonTooltip(['name','value']))
n