In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import json
import requests

## 1 Read in data
### 1.1 Make API request for WFP Data

In [29]:
small_url = "https://data.humdata.org/api/action/datastore_search?resource_id=12d7c8e3-eff9-4db0-93b7-726825c4fe9a&limit=1"
small_r = requests.get(small_url)
num_records = small_r.json()['result']['total']

url = "https://data.humdata.org/api/action/datastore_search?resource_id=12d7c8e3-eff9-4db0-93b7-726825c4fe9a&limit=" + str(num_records)
r = requests.get(url)
wfp_df = pd.DataFrame(r.json()['result']['records'])

### 1.2. Limit data to countries of interest, remove and rename columns

In [75]:
gdf = gpd.read_file("regions.geojson")
countries = gdf['ADMIN0'].value_counts().index.tolist()
wfp_df_filtered = wfp_df[wfp_df['adm0_name'].isin(countries)]

cols = [c for c in wfp_df_filtered.columns if c.lower()[-3:] != '_id']
wfp_df_clean = wfp_df_filtered[cols].drop(columns=['mp_commoditysource'])
wfp_df_clean = wfp_df_clean.rename(columns={
    "adm0_name": "ADMIN0", 
    "adm1_name": "ADMIN1",
    "cm_name": "commodity",
    "cur_name": "currency",
    "pt_name": "mkt_type",
    "um_name": "unit_measure",
    "mp_month": "month",
    "mp_year": "year",
    "mp_price": "price"
})

## 2. Clean data
### 2.1 Commodity names, units of measurement
All commodity names include whether the price is retail or wholesale, but this information is already available in the mkt_type column. Clean commodity names to remove unnecessary information, see which crops are most prominent.

In [167]:
wfp_df_clean.commodity = wfp_df_clean.commodity.str.split(" - ", expand = True)[0]

In [171]:
wfp_df_clean.unit_measure.value_counts()

KG           27205
100 KG        6182
90 KG         5842
L             5113
3.5 KG        5069
3 KG          3365
MT            2379
USD/LCU       1332
50 KG          836
Head           522
Unit           446
Day            181
500 ML         169
400 G          169
LCU/3.5kg       46
Name: unit_measure, dtype: int64

The most common unit of measurement is KG - limit dataset to only include crops which are measured in KG, and change prices to reflect price per KG (instead of price per 100 KG or per 90 KG). Include the MT unit of measurement since it is a metric ton.

Justification to drop other units of measurement:
* L - used for oil, fuel, milk, and ground nuts (in one case).
* USD/LCU - exchange rates for Somalia, South Sudan, and Ethiopia. Will reference an external dataset for conversion instead.
* Head and Unit -  used for livestock
* Day - used for wages
* "500 ML" and "400 G" - 169 instances each, used for milk and bread in Nairobi
* LCU/3.5kg - used for milling cost

In [199]:
# Remove rows that do not use kilograms
has_kg = wfp_df_clean.unit_measure.str.contains('KG')
is_metric_ton = wfp_df_clean.unit_measure == 'MT'
wfp_df_clean = wfp_df_clean[has_kg | is_metric_ton]

# Convert to price per kilogram
unit_convert = {
    "KG": 1,
    "100 KG": 1./100,
    "90 KG": 1./90,
    "3.5 KG": 1/3.5,
    "3 KG": 1./3,
    "MT": 1./1000,
    "50 KG": 1./50
}

def convert_to_kg(row):
    new_price = row.price * unit_convert[row.unit_measure]
    row["price_per_kg"] = new_price
    return row

wfp_df_clean = wfp_df_clean.apply(convert_to_kg, axis=1).drop(columns=["unit_measure", "price"])
wfp_df_clean.head()

Unnamed: 0,ADMIN0,ADMIN1,mkt_name,commodity,currency,mkt_type,month,year,price_per_kg
55435,Uganda,Kampala,Owino,Plantains,UGX,Retail,7,2008,266.25
55436,Uganda,Kampala,Owino,Plantains,UGX,Retail,8,2008,310.0
55437,Uganda,Kampala,Owino,Plantains,UGX,Retail,9,2008,351.11
55438,Uganda,Kampala,Owino,Plantains,UGX,Retail,10,2008,425.0
55439,Uganda,Kampala,Owino,Plantains,UGX,Retail,11,2008,407.27


### 2.2 (INCOMPLETE) Convert prices to USD (or any standard currency)
The earliest hunger score data we have is from 2009, so get rid of any data from before then.

In [215]:
wfp_df_clean = wfp_df_clean[wfp_df_clean.year >= 2009]

I tried finding packages to convert currencies to USD based off of monthly exchange rates, but the well-supported packages (e.g. forex-python) seem to only have data from Europe/major participants in the foreign exchange market.

The African Development Bank Group (ADBG) publishes yearly Excel spreadsheets with exchange rates for our time period of interest, but I did not have enough time to download and clean all of the .xlsx files. Perhaps conversion to a single currency will not be necessary?
https://www.afdb.org/en/documents/financial-information/exchange-rates  
This is all I found after a (very rushed) first pass, so maybe there's a better resource out there.

Unnamed: 0,ADMIN0,ADMIN1,mkt_name,commodity,currency,mkt_type,month,year,price_per_kg,date
55441,Uganda,Kampala,Owino,Plantains,UGX,Retail,1,2009,395.000000,2009-01-01
55442,Uganda,Kampala,Owino,Plantains,UGX,Retail,2,2009,386.000000,2009-02-01
55443,Uganda,Kampala,Owino,Plantains,UGX,Retail,3,2009,619.230000,2009-03-01
55859,Uganda,Iganga,Iganga,Millet,UGX,Retail,2,2013,1500.000000,2013-02-01
55860,Uganda,Iganga,Iganga,Millet,UGX,Retail,3,2013,1566.600000,2013-03-01
...,...,...,...,...,...,...,...,...,...,...
1357664,Sudan,Red Sea,Port Sudan,Sorghum (white),SDG,Wholesale,11,2019,19.629667,2019-11-01
1357665,Sudan,Red Sea,Port Sudan,Sorghum (white),SDG,Wholesale,12,2019,24.444444,2019-12-01
1357666,Sudan,Red Sea,Port Sudan,Sorghum (white),SDG,Wholesale,1,2020,28.333333,2020-01-01
1357667,Sudan,Red Sea,Port Sudan,Sorghum (white),SDG,Wholesale,2,2020,31.666667,2020-02-01


### 2.3 Average prices across markets within an ADMIN1 zone

### 2.4 Examine differences between retail/wholesale pricing, split data if necessary

### 2.5 Determine approach to different commodities
* Is there enough data for a certain commodity within each of the admin zones to drop other commodities?
* What pricing differences are there between different commodities? Can commodities be grouped? 