In [1]:
import pandas as pd
import numpy as np

In [2]:
countrycodes_file = "CountryCodesLookup.csv"
countrycodes = pd.read_csv(countrycodes_file, encoding="ISO-8859-1")
countrycodes = countrycodes.rename(columns={'COUNTRY':'country'})

In [3]:
food_price_file = "global-food-prices/wfp_market_food_prices.csv"
food_price = pd.read_csv(food_price_file, encoding="ISO-8859-1")

food_price.head()

Unnamed: 0,adm0_id,adm0_name,adm1_id,adm1_name,mkt_id,mkt_name,cm_id,cm_name,cur_id,cur_name,pt_id,pt_name,um_id,um_name,mp_month,mp_year,mp_price,mp_commoditysource
0,1,Afghanistan,272,Badakhshan,266,Fayzabad,55,Bread,87,AFN,15,Retail,5,KG,1,2014,50.0,WFP
1,1,Afghanistan,272,Badakhshan,266,Fayzabad,55,Bread,87,AFN,15,Retail,5,KG,2,2014,50.0,WFP
2,1,Afghanistan,272,Badakhshan,266,Fayzabad,55,Bread,87,AFN,15,Retail,5,KG,3,2014,50.0,WFP
3,1,Afghanistan,272,Badakhshan,266,Fayzabad,55,Bread,87,AFN,15,Retail,5,KG,4,2014,50.0,WFP
4,1,Afghanistan,272,Badakhshan,266,Fayzabad,55,Bread,87,AFN,15,Retail,5,KG,5,2014,50.0,WFP


In [4]:
food_price2016 = food_price.loc[food_price['mp_year']==2016,\
                ['adm0_name','cur_name','cm_name','pt_name','um_name','mp_month','mp_year','mp_price']]
food_price2016.head()

Unnamed: 0,adm0_name,cur_name,cm_name,pt_name,um_name,mp_month,mp_year,mp_price
22,Afghanistan,AFN,Bread,Retail,KG,1,2016,55.5
23,Afghanistan,AFN,Bread,Retail,KG,2,2016,55.5
24,Afghanistan,AFN,Bread,Retail,KG,3,2016,55.5
25,Afghanistan,AFN,Bread,Retail,KG,4,2016,50.0
26,Afghanistan,AFN,Bread,Retail,KG,5,2016,50.0


In [5]:
table = pd.pivot_table(food_price2016, values=['mp_price'], \
                               index = ['adm0_name','cur_name','cm_name','um_name'],\
                               aggfunc={"mp_price": np.mean})

display (table.head(), table.describe())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mp_price
adm0_name,cur_name,cm_name,um_name,Unnamed: 4_level_1
Afghanistan,AFN,Bread,KG,44.749375
Afghanistan,AFN,Exchange rate,USD/LCU,67.790319
Afghanistan,AFN,Fuel (diesel),L,36.969922
Afghanistan,AFN,Rice (low quality),KG,41.392708
Afghanistan,AFN,"Wage (non-qualified labour, non-agricultural)",Day,285.026042


Unnamed: 0,mp_price
count,889.0
mean,8314.943
std,96037.88
min,0.1655455
25%,16.92273
50%,139.5227
75%,1091.762
max,2499155.0


In [6]:
table = table.reset_index(level=['um_name'])  #convert pivot table indexes into columns
table = table.reset_index(level=['cm_name'])  #convert pivot table indexes into columns
table = table.reset_index(level=['cur_name'])  #convert pivot table indexes into columns
table = table.reset_index(level=['adm0_name'])  #convert pivot table indexes into columns

#https://www.kaggle.com/jboysen/global-food-prices/version/1
table = table.rename(columns={'adm0_name':'country'})
table = table.rename(columns={'cur_name':'currency_ID'})
table = table.rename(columns={'cm_name':'commodity_purchased'})
table = table.rename(columns={'um_name':'unit_of_goods_measurement'})
table = table.rename(columns={'mp_price':'monthly_average_price_paid'})

table.head()

Unnamed: 0,country,currency_ID,commodity_purchased,unit_of_goods_measurement,monthly_average_price_paid
0,Afghanistan,AFN,Bread,KG,44.749375
1,Afghanistan,AFN,Exchange rate,USD/LCU,67.790319
2,Afghanistan,AFN,Fuel (diesel),L,36.969922
3,Afghanistan,AFN,Rice (low quality),KG,41.392708
4,Afghanistan,AFN,"Wage (non-qualified labour, non-agricultural)",Day,285.026042


In [7]:
#filtering table for all rows with the commodity
table_bread = table[table.commodity_purchased.str.contains('Bread', case=False)]

#sorting the list
table_bread = table_bread.sort_values(by=['country','monthly_average_price_paid'],ascending=True,na_position='first')

#dropping duplicated, keeping item with highest price (=last)
table_bread = table_bread.drop_duplicates(subset='country',keep='last')

#Merging country codes
table_bread = pd.merge(countrycodes, table_bread, how="left", left_on=['country'],right_on=['country'])
table_bread = table_bread.drop(['A2 (ISO)','NUM (UN)','DIALING CODE','spotifyFlag',],axis=1)

table_bread.head()

Unnamed: 0,country,A3 (UN),currency_ID,commodity_purchased,unit_of_goods_measurement,monthly_average_price_paid
0,Afghanistan,AFG,AFN,Bread,KG,44.749375
1,Albania,ALB,,,,
2,Algeria,DZA,DZD,Bread,Unit,10.0
3,American Samoa,ASM,,,,
4,Andorra,AND,,,,


In [8]:
#filtering table for all rows with the commodity
table_wheat = table[table.commodity_purchased.str.contains('Wheat', case=False)]

#sorting the list
table_wheat = table_wheat.sort_values(by=['country','monthly_average_price_paid'],ascending=True,na_position='first')

#dropping duplicated, keeping item with highest price (=last)
table_wheat = table_wheat.drop_duplicates(subset='country',keep='last')

#Merging country codes
table_wheat = pd.merge(countrycodes, table_wheat, how="left", left_on=['country'],right_on=['country'])
table_wheat = table_wheat.drop(['A2 (ISO)','NUM (UN)','DIALING CODE','spotifyFlag',],axis=1)

table_wheat.head()

Unnamed: 0,country,A3 (UN),currency_ID,commodity_purchased,unit_of_goods_measurement,monthly_average_price_paid
0,Afghanistan,AFG,AFN,Wheat,KG,24.075078
1,Albania,ALB,,,,
2,Algeria,DZA,,,,
3,American Samoa,ASM,,,,
4,Andorra,AND,,,,


In [9]:
#filtering table for all rows with the commodity
table_rice = table[table.commodity_purchased.str.contains('Rice', case=False)]

#sorting the list
table_rice = table_rice.sort_values(by=['country','monthly_average_price_paid'],ascending=True,na_position='first')

#removing items different from KG
#table_rice=table_rice.loc[table['unit_of_goods_measurement']=="KG",\
#                      ['country','currency_ID','commodity_purchased','unit_of_goods_measurement','monthly_average_price_paid']]

#dropping duplicated, keeping item with highest price (=last)
table_rice = table_rice.drop_duplicates(subset='country',keep='last')

#Merging country codes
table_rice = pd.merge(countrycodes, table_rice, how="left", left_on=['country'],right_on=['country'])
table_rice = table_rice.drop(['A2 (ISO)','NUM (UN)','DIALING CODE','spotifyFlag',],axis=1)

table_rice.head()

Unnamed: 0,country,A3 (UN),currency_ID,commodity_purchased,unit_of_goods_measurement,monthly_average_price_paid
0,Afghanistan,AFG,AFN,Rice (low quality),KG,41.392708
1,Albania,ALB,,,,
2,Algeria,DZA,DZD,Rice,KG,106.25
3,American Samoa,ASM,,,,
4,Andorra,AND,,,,


In [11]:
#Merging 3 tables into one.
table_food = pd.merge(table_bread, table_rice, how="left", on=['country'],suffixes=('','y'))
table_food = table_food.drop(['A3 (UN)y','currency_IDy'],axis=1)

table_food = table_food.rename(columns={'commodity_purchased':'commodity_purchased(bread)',\
                                        'unit_of_goods_measurement':'unit_of_goods_measurement(bread)',\
                                        'monthly_average_price_paid':'monthly_average_price_paid(bread)',\
                                        'commodity_purchasedy':'commodity_purchased(rice)',\
                                        'unit_of_goods_measurementy':'unit_of_goods_measurement(rice)',\
                                        'monthly_average_price_paidy':'monthly_average_price_paid(rice)'})

table_food = pd.merge(table_food, table_wheat, how="left", on=['country'],suffixes=('','y'))
table_food = table_food.drop(['A3 (UN)y','currency_IDy'],axis=1)

table_food = table_food.rename(columns={'commodity_purchased':'commodity_purchased(wheat)',\
                                        'unit_of_goods_measurement':'unit_of_goods_measurement(wheat)',\
                                        'monthly_average_price_paid':'monthly_average_price_paid(wheat)'})

In [13]:
table_food

Unnamed: 0,country,A3 (UN),currency_ID,commodity_purchased(bread),unit_of_goods_measurement(bread),monthly_average_price_paid(bread),commodity_purchased(rice),unit_of_goods_measurement(rice),monthly_average_price_paid(rice),commodity_purchased(wheat),unit_of_goods_measurement(wheat),monthly_average_price_paid(wheat)
0,Afghanistan,AFG,AFN,Bread,KG,44.749375,Rice (low quality),KG,41.392708,Wheat,KG,24.075078
1,Albania,ALB,,,,,,,,,,
2,Algeria,DZA,DZD,Bread,Unit,10.000000,Rice,KG,106.250000,,,
3,American Samoa,ASM,,,,,,,,,,
4,Andorra,AND,,,,,,,,,,
5,Angola,AGO,,,,,,,,,,
6,Anguilla,AIA,,,,,,,,,,
7,Antarctica,ATA,,,,,,,,,,
8,Antigua and Barbuda,ATG,,,,,,,,,,
9,Argentina,ARG,,,,,,,,,,
