# This script takes the World Food Program's Global Food Prices dataset and aggregates the data by country, food category, and optionally, region.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import geopy
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
import numpy as np

In [2]:
DATA_FILE = "./wfp_foodprices.csv"
WB_FILE = "./gdp.xls"
LEB_FILE = "./wfp_food_prices_lebanon.csv"

In [3]:
price_df = pd.read_csv(DATA_FILE)
code_df = pd.read_excel(WB_FILE, sheet_name=0, header=3)
region_df = pd.read_excel(WB_FILE, sheet_name=1)
region_df = pd.merge(code_df, region_df, how='inner', on='Country Code')
leb_df = pd.read_csv(LEB_FILE)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


This function gets percent change in prices across time for commodities in different markets

In [4]:
def get_percent_change(food_df, new_col, market_col, comm_col, date_col, price_col, date_limit=False):
    
    food_df.sort_values(by=date_col, inplace=True)
    df = food_df.copy()
    if date_limit:
        df = df.loc[df[date_col].apply(lambda date_: date_.year >= date_limit)]
    percent_change = pd.DataFrame()
    for market in df[market_col].unique():
        for commodity in df[comm_col].unique():
            market_change = df.loc[(df[market_col] == market) & (df[comm_col] == commodity)][price_col].pct_change()
            percent_change = pd.concat([percent_change, market_change])
    percent_change = percent_change.rename(columns={0:new_col}).fillna(0)
    final_df = food_df.merge(percent_change, how='left', left_index=True, right_index=True)
    
    return final_df

Clean the Lebanon data and get the monthly percent changes

In [95]:
leb_df = leb_df.groupby(['date', 'cmname', 'mktname']).mean().reset_index()
leb_df['date'] = leb_df['date'].apply(lambda date_: datetime.strptime(date_, '%m/%d/%Y').date())
leb_df.sort_values(by='date', inplace=True)

In [96]:
leb_df = get_percent_change(leb_df, 'pct_change', 'mktname', 'cmname', 'date', 'price')
leb_df = get_percent_change(leb_df, 'pct_change_5yr', 'mktname', 'cmname', 'date', 'price', 2016)
leb_df = get_percent_change(leb_df, 'pct_change_2yr', 'mktname', 'cmname', 'date', 'price', 2019)

Geocode the different markets (cities) for future map visualizations

In [97]:
leb_df = pd.read_csv('./lebanon_food_prices_cleaned.csv')

In [98]:
#clean city names so they match geocode:
rename_city = {"Minieh-Dannieh": "Minieh - Danniyeh", "Bechare":"Bsharri"}
leb_df['mktname'].replace(rename_city, inplace=True)

In [5]:
locator = geopy.Nominatim(user_agent="Lebanon", timeout=None)
geocode = locator.geocode

This function gets the latitude and longitude from the market name and returns the df as a GeoPandas df

In [6]:
def geocode_df(df, market_col, subset_of=False):
    markets = [mkt for mkt in df[market_col].unique() if mkt != "National Average"]
    geocoder = {}
    for market in markets:
        if subset_of:
            location = geocode(market + ", " + subset_of)
        else:
            location = geocode(market)
        geocoder[market] = geocoder.get(market, (location.latitude, location.longitude))
    df['lat'] = df[market_col].apply(lambda market: geocoder[market][0] if market in geocoder.keys() else np.nan)
    df['long'] = df[market_col].apply(lambda market: geocoder[market][1] if market in geocoder.keys() else np.nan)
    geometry = [Point(xy) for xy in zip(df['long'].astype(float), df['lat'].astype(float))]
    crs = {'init': 'epsg:4269'}
    df = GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return df

In [101]:
leb_df = geocode_df(leb_df, "mktname", subset_of="Lebanon")
leb_df.to_csv('./lebanon_food_prices_cleaned.csv')

Clean Global Food Prices (72 countries) and get monthly percent changes

In [14]:
price_df = price_df.loc[price_df['mkt_name'] != 'National Average']
price_df['Country Name'] = price_df['adm0_name']
merge = pd.merge(region_df, price_df, how="inner", on="Country Name")
merge = merge.groupby(["Country Name", "Region", "mp_year", "mp_month", "cm_name", "cur_name"]).mean().reset_index()
country = merge[["Country Name", "Region", "cm_name", "mp_year", "mp_month", "mp_price", "cur_name"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_df['Country Name'] = price_df['adm0_name']


In [15]:
country['date'] = country['mp_year'].astype(str) + "-" + country['mp_month'].astype(str)
country = get_percent_change(country, 'pct_change', 'Country Name', 'cm_name', 'date', 'mp_price')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  country['date'] = country['mp_year'].astype(str) + "-" + country['mp_month'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_df.sort_values(by=date_col, inplace=True)


Get latitude and longitude and convert to GeoPandas df

In [16]:
country = geocode_df(country, "Country Name")
country

Unnamed: 0,Country Name,Region,cm_name,mp_year,mp_month,mp_price,cur_name,date,pct_change,lat,long,geometry
85628,Niger,Sub-Saharan Africa,Millet - Retail,1990,1,61.000000,XOF,1990-1,0.000000,17.735621,9.323843,POINT (9.32384 17.73562)
85629,Niger,Sub-Saharan Africa,Rice (imported) - Retail,1990,1,201.666667,XOF,1990-1,0.000000,17.735621,9.323843,POINT (9.32384 17.73562)
85630,Niger,Sub-Saharan Africa,Sorghum - Retail,1990,1,57.750000,XOF,1990-1,0.000000,17.735621,9.323843,POINT (9.32384 17.73562)
85657,Niger,Sub-Saharan Africa,Sorghum - Retail,1990,10,61.750000,XOF,1990-10,0.069264,17.735621,9.323843,POINT (9.32384 17.73562)
85656,Niger,Sub-Saharan Africa,Rice (imported) - Retail,1990,10,200.000000,XOF,1990-10,-0.008264,17.735621,9.323843,POINT (9.32384 17.73562)
...,...,...,...,...,...,...,...,...,...,...,...,...
57045,Iraq,Middle East & North Africa,Tea - Retail,2020,9,6263.888889,IQD,2020-9,-0.010965,33.095579,44.174977,POINT (44.17498 33.09558)
57046,Iraq,Middle East & North Africa,Tomatoes - Retail,2020,9,833.319444,IQD,2020-9,0.612599,33.095579,44.174977,POINT (44.17498 33.09558)
57047,Iraq,Middle East & North Africa,Toothbrush - Retail,2020,9,2489.583333,IQD,2020-9,-0.041444,33.095579,44.174977,POINT (44.17498 33.09558)
78550,Mozambique,Sub-Saharan Africa,"Sugar (brown, imported) - Retail",2020,9,72.916000,MZN,2020-9,-0.065179,-19.302233,34.914498,POINT (34.91450 -19.30223)


Aggregate countries by Region and get their percent change

In [17]:
region = country.loc[country['Country Name'] != 'Lebanon']
region = country.groupby(["Region", "date", 'cm_name']).mean().reset_index()
region = get_percent_change(region, 'pct_change', 'Region', 'cm_name', 'date', 'mp_price')

In [20]:
region

Unnamed: 0,Region,date,cm_name,mp_year,mp_month,mp_price,pct_change_x,lat,long,pct_change_y
54295,Sub-Saharan Africa,1990-1,Millet - Retail,1990,1,61.000000,0.000000,17.735621,9.323843,0.000000
54296,Sub-Saharan Africa,1990-1,Rice (imported) - Retail,1990,1,201.666667,0.000000,17.735621,9.323843,0.000000
54297,Sub-Saharan Africa,1990-1,Sorghum - Retail,1990,1,57.750000,0.000000,17.735621,9.323843,0.000000
54298,Sub-Saharan Africa,1990-10,Millet - Retail,1990,10,64.200000,0.052459,17.735621,9.323843,0.052459
54299,Sub-Saharan Africa,1990-10,Rice (imported) - Retail,1990,10,200.000000,-0.008264,17.735621,9.323843,-0.008264
...,...,...,...,...,...,...,...,...,...,...
54235,South Asia,2020-9,Ghee (artificial) - Retail,2020,9,252.328000,-0.002995,30.330840,71.247499,-0.002995
54236,South Asia,2020-9,Lentils (broken) - Retail,2020,9,138.208333,-0.026416,28.108393,84.091714,-0.026416
54237,South Asia,2020-9,Lentils (masur) - Retail,2020,9,110.886333,0.010566,27.403859,80.770371,0.014771
54239,South Asia,2020-9,Lentils - Retail,2020,9,148.750000,0.025862,7.555494,80.713785,0.025862


write dfs to separate excel tabs and close script

In [18]:
file = "./global_food_prices_cleaned.xlsx"
tabs = {"countries" : country, "regions":region}
writer = pd.ExcelWriter(file, engine='openpyxl')
for tab, df in tabs.items():
    df.to_excel(writer, tab, index=False)
writer.save()