# This script takes the World Food Program's Global Food Prices dataset and aggregates the data by country, food category, and optionally, region.

In [83]:
import pandas as pd
import numpy as np
from datetime import datetime
import geopy
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point
import numpy as np

In [84]:
DATA_FILE = "./wfp_foodprices.csv"
WB_FILE = "./gdp.xls"
LEB_FILE = "./wfp_food_prices_lebanon.csv"

In [85]:
price_df = pd.read_csv(DATA_FILE)
code_df = pd.read_excel(WB_FILE, sheet_name=0, header=3)
region_df = pd.read_excel(WB_FILE, sheet_name=1)
region_df = pd.merge(code_df, region_df, how='inner', on='Country Code')
leb_df = pd.read_csv(LEB_FILE)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


This function gets percent change in prices across time for commodities in different markets

In [86]:
def get_percent_change(food_df, new_col, market_col, comm_col, date_col, price_col, date_limit=False):
    
    food_df.sort_values(by=date_col, inplace=True)
    df = food_df.copy()
    if date_limit:
        df = df.loc[df[date_col].apply(lambda date_: date_.year >= date_limit)]
    percent_change = pd.DataFrame()
    for market in df[market_col].unique():
        for commodity in df[comm_col].unique():
            market_change = df.loc[(df[market_col] == market) & (df[comm_col] == commodity)][price_col].pct_change()
            percent_change = pd.concat([percent_change, market_change])
    percent_change = percent_change.rename(columns={0:new_col}).fillna(0)
    final_df = food_df.merge(percent_change, how='left', left_index=True, right_index=True)
    
    return final_df

Clean the Lebanon data and get the monthly percent changes

In [79]:
leb_df = leb_df.groupby(['date', 'cmname', 'mktname']).mean().reset_index()
leb_df['date'] = leb_df['date'].apply(lambda date_: datetime.strptime(date_, '%m/%d/%Y').date())
leb_df.sort_values(by='date', inplace=True)

In [81]:
leb_df = get_percent_change(leb_df, 'pct_change', 'mktname', 'cmname', 'date', 'price')
leb_df = get_percent_change(leb_df, 'pct_change_5yr', 'mktname', 'cmname', 'date', 'price', 2016)
leb_df = get_percent_change(leb_df, 'pct_change_2yr', 'mktname', 'cmname', 'date', 'price', 2019)

Geocode the different markets (cities) for future map visualizations

In [59]:
leb_df = pd.read_csv('./lebanon_food_prices_cleaned.csv')

In [24]:
#clean city names so they match geocode:
rename_city = {"Minieh-Dannieh": "Minieh - Danniyeh", "Bechare":"Bsharri"}
leb_df['mktname'].replace(rename_city, inplace=True)

In [87]:
locator = geopy.Nominatim(user_agent="Lebanon", timeout=None)
geocode = locator.geocode

This function gets the latitude and longitude from the market name and returns the df as a GeoPandas df

In [88]:
def geocode_df(df, market_col, subset_of=False):
    markets = [mkt for mkt in df[market_col].unique() if mkt != "National Average"]
    geocoder = {}
    for market in markets:
        if subset_of:
            location = geocode(market + ", " + subset_of)
        else:
            location = geocode(market)
        geocoder[market] = geocoder.get(market, (location.latitude, location.longitude))
    df['lat'] = df[market_col].apply(lambda market: geocoder[market][0] if market in geocoder.keys() else np.nan)
    df['long'] = df[market_col].apply(lambda market: geocoder[market][1] if market in geocoder.keys() else np.nan)
    geometry = [Point(xy) for xy in zip(df['long'].astype(float), df['lat'].astype(float))]
    crs = {'init': 'epsg:4269'}
    df = GeoDataFrame(df, crs=crs, geometry=geometry)
    
    return df

In [67]:
leb_df = geocode_df(leb_df, "mktname", subset_of="Lebanon")
leb_df.to_csv('./lebanon_food_prices_cleaned.csv')

Clean Global Food Prices (72 countries) and get monthly percent changes

In [89]:
price_df['Country Name'] = price_df['adm0_name']
merge = pd.merge(region_df, price_df, how="inner", on="Country Name")
merge = merge.groupby(["Country Name", "Region", "mp_year", "mp_month", "cm_name", "cur_name"]).mean().reset_index()
country = merge[["Country Name", "Region", "cm_name", "mp_year", "mp_month", "mp_price", "cur_name"]]

In [None]:
country['date'] = country['mp_year'].astype(str) + "-" + country['mp_month'].astype(str)
country = get_percent_change(country, 'pct_change', 'Country Name', 'cm_name', 'date', 'mp_price')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  country['date'] = country['mp_year'].astype(str) + "-" + country['mp_month'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_df.sort_values(by=date_col, inplace=True)


Get latitude and longitude and convert to GeoPandas df

In [None]:
country = geocode_df(country, "Country Name")
country

Aggregate countries by Region and get their percent change

In [None]:
region = country.loc[country['Country Name'] != 'Lebanon']
region = country.groupby(["Region", "date", 'cm_name']).mean().reset_index()
region = get_percent_change(region, 'pct_change', 'Region', 'cm_name', 'date', 'mp_price')

write dfs to separate excel tabs and close script

In [None]:
file = "./global_food_prices_cleaned.xlsx"
tabs = {"countries" : country, "regions":region}
writer = pd.ExcelWriter(file, engine='openpyxl')
for tab, df in tabs.items():
    df.to_excel(writer, tab, index=False)
writer.save()