In [51]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import pickle
import numpy as np
from ipywidgets import interact
import json

In [52]:
pickle_file="./Data/uni_df.pickle"
df=pickle.load(open(pickle_file,'rb'))

In [68]:
def compute_score(df,weights=None):
    
    # Defining useful method
    def drop_words( s , w=1 , end=True):
        if end:
            return s.rsplit(' ',w)[0]
        else:
            return s.split(' ',w)[-1]
    
    # In df, getting a list of the columns corresponding to import, export and production respectively (keeping year and area)
    import_cols=[col for col in df.columns if ('1000 Head' not in col) and 'import' in col.lower()]
    export_cols=[col for col in df.columns if ('1000 Head' not in col) and 'export' in col.lower()]
    prod_cols=[col for col in df.columns if ('1000 Head' not in col) and 'production' in col.lower()]
    import_cols.extend(["Area","Year"])
    export_cols.extend(["Area","Year"])
    prod_cols.extend(["Area","Year"])
    
    df_prod=df[prod_cols].set_index(['Area','Year'])
    df_exp=df[export_cols].set_index(['Area','Year'])
    df_imp=df[import_cols].set_index(['Area','Year'])
    
    # Removing useless words to facilitate matching (unit, export, import, production)
    df_prod.columns=[drop_words(col,2) for col in df_prod.columns]
    df_imp.columns=[drop_words(col,3) for col in df_imp.columns]
    df_exp.columns=[drop_words(col,3) for col in df_exp.columns]
    
    # Finding the features present in all three dfs
    prod_columns=[drop_words(s) for s in df_prod.columns]
    prod_columns=[s for s in prod_columns if len(df_imp.filter(regex=s).columns)>0]
    interesting_imp=[df_imp.filter(regex=s).columns[0] for s in prod_columns if len(df_imp.filter(regex=s).columns)>0]
    interesting_exp=[df_exp.filter(regex=s).columns[0] for s in prod_columns if len(df_exp.filter(regex=s).columns)>0]
    interesting_prod=[df_prod.filter(regex=s).columns[0] for s in prod_columns if len(df_prod.filter(regex=s).columns)>0]
    
    df_exp=df_exp[interesting_exp]
    df_imp=df_imp[interesting_imp]
    df_prod=df_prod[interesting_prod]
    
    # initialising new df with index
    score=pd.DataFrame(index=df_exp.index)
    
    # calculating score
    array=list(map(list,zip(*[df_prod.columns,df_imp.columns,df_exp.columns])))
    for prod,imp,exp in array:
        score[drop_words(prod)]=df_prod[prod]/(df_prod[prod]+df_imp[imp]-df_exp[exp])
        
    if weights==None:
        return score
    
    
    
    ## To implement: 
    ##
    ## Aggregate score if weights != None 
    ##
    ##
    
    
    return score

In [69]:
compute_score(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,"Anise, badian, fennel, coriander",Apples,Apricots,Artichokes,Asparagus,Avocados,Bambara beans,Bananas,Barley,"Beans, dry",...,Buffaloes,"Camelids, other",Camels,Cattle,Goats,Horses,Mules,Pigs,Sheep,Sheep and Goats
Area,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,1970,,1.092743,1.000000,,,,,,1.000000,,...,,,1.0,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000
Afghanistan,1971,,1.041616,1.000000,,,,,,1.000000,,...,,,1.0,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000
Afghanistan,1972,,1.271024,1.016480,,,,,,1.000000,,...,,,1.0,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000
Afghanistan,1973,,1.171378,1.028350,,,,,,1.000000,,...,,,1.0,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000
Afghanistan,1974,,1.247532,1.045622,,,,,0.000000,1.000000,,...,,,1.0,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,2011,0.642424,0.322145,0.895833,1.000000,0.992453,1.083403,,1.017716,0.864938,0.713129,...,,,,0.996967,1.000000,0.996441,1.000000,0.999763,0.999990,0.999999
Zimbabwe,2012,0.579710,0.335224,0.754098,1.000000,0.990099,1.735648,,1.004954,0.774391,0.885165,...,,,,0.998851,0.999980,0.997400,1.000000,0.999743,0.999219,0.999934
Zimbabwe,2013,0.666667,0.357614,0.807018,0.996678,0.980392,2.063492,,0.999271,0.768101,0.922806,...,,,,0.998939,0.999961,0.999393,0.992707,0.999913,0.999869,0.999956
Zimbabwe,2014,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000,1.000000,...,,,,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
