In [79]:
import os
import re
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from dataprep.clean import clean_country, validate_country
# import warnings
# warnings.filterwarnings("ignore")

In [80]:
production_df = pd.read_csv(os.path.join('../data/palm_olein_data/Production_Crops_Livestock_E_All_Data.csv'), encoding='ISO-8859-1')
production_df['Code'] = production_df['Area'].str[:3]  # create the 3-letter country code needed for choropleth map
production_df['Code'] = production_df['Code'].str.upper()
code = production_df.pop('Code')  # pop the code column
production_df.insert(2, 'Code', code)  # move it to the position we want
production_df.head()
# deal with the warnings

Unnamed: 0,Area Code,Area,Code,Item Code,Item,Element Code,Element,Unit,Y1961,Y1961F,...,Y2016,Y2016F,Y2017,Y2017F,Y2018,Y2018F,Y2019,Y2019F,Y2020,Y2020F
0,2,Afghanistan,AFG,221,"Almonds, with shell",5312,Area harvested,ha,,,...,19481.0,,19793.0,,20053.0,,29203.0,,22134.0,
1,2,Afghanistan,AFG,221,"Almonds, with shell",5419,Yield,hg/ha,,,...,16859.0,Fc,13788.0,Fc,17161.0,Fc,13083.0,Fc,17759.0,Fc
2,2,Afghanistan,AFG,221,"Almonds, with shell",5510,Production,tonnes,,,...,32843.0,,27291.0,,34413.0,,38205.0,,39307.0,
3,2,Afghanistan,AFG,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,,M,...,24500.0,Im,26500.0,Im,25333.0,Im,25444.0,Im,25759.0,Im
4,2,Afghanistan,AFG,711,"Anise, badian, fennel, coriander",5419,Yield,hg/ha,,,...,7075.0,Fc,7149.0,Fc,7142.0,Fc,7123.0,Fc,7138.0,Fc


# Why?
My interest in palm oil pricing cam about from watching the price per mt of RBD Palm Olein FOB Malaysia increase from USD515 p/mt on 7th May 2020 to USD1064 on 1st March 2021.

The price had more than doubled in 10 months, and it continued to grow to a peak of US$2,000 per mt in 2022. I had questions:
- What had driven this rapid increase?
- If this was a bubble, when would it pop?
- If this was a structural change, what were the changes?
- In an uncertain business environment, how can buyers use publicly available data to make better purchasing decisions?

In [81]:
price = pd.read_csv('../palm_olein/data/palm oil prices 021020 - 290422.csv')
price.columns = ['date', 'price']
price['price'] = price['price'].ffill()
fig = px.line(price, x='date', y='price')
fig.update_layout(
    title_text="<b>Global Palm Oil Price<b>",  # Add figure title
    title_font_size=40,
    legend_font_size=20,
    width=1000,
    height=700
)

fig.update_xaxes(
    title_text="</b>Year</b>",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25)
)

fig.update_yaxes(
    title_text="<b>Price US$ p/mt</b>",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25))

fig.show()

# why am i missing vert & horizontal lines?how to reduce the number of dates shown on the x axis so its less noisy and more readable

So i began researching. I started with the macro aspects of not only the palm oil market, but also, because they are substitutes, the vegetable oil market in general.

# Set-up

In [82]:
production_elements = ['Area harvested', 'Yield', 'Production', 'Stocks', 'Laying', 'Producing Animals/Slaughtered', 'Yield/Carcass Weight', 'Milk Animals', 'Prod Popultn']

In [83]:
DATA = '../data/palm_olein_data/'

In [84]:
# area codes >1000 and in this list are regions and will cause double count unless removed
area_code_list = [261, 265, 266, 268, 269]

In [85]:
oil_crops = ['Oil, castor beans', 'Oil, citronella', 'Oil, coconut (copra)', 'Oil, cottonseed', 'Oil, essential nes', 'Oil, groundnut', 'Oil, linseed', 'Oil, maize', 'Oil, olive residues', 'Oil, olive, virgin', 'Oil, palm', 'Oil, palm kernel', 'Oil, rapeseed', 'Oil, sesame', 'Oil, soybean', 'Oil, sunflower']

###

In [86]:
# Wrangle production data 🗸
production_df = pd.read_csv(os.path.join(DATA, 'Production_Crops_Livestock_E_All_Data.csv'), encoding='ISO-8859-1')
# (77523, 127, 76.86mb)
production_df = production_df.groupby(['Item']).sum()
production_df = production_df.drop(['Item Code', 'Area Code', 'Element Code'], axis=1)
production_df.columns = production_df.columns.str.replace('Y', '')
production_df = production_df[production_df.index.isin(oil_crops)].transpose()
production_df.index.name = "Year"
production_df = production_df.reset_index(drop=False)
production_df.head()
# print(production_df.shape)

Item,Year,"Oil, coconut (copra)","Oil, cottonseed","Oil, groundnut","Oil, linseed","Oil, maize","Oil, olive, virgin","Oil, palm","Oil, palm kernel","Oil, rapeseed","Oil, sesame","Oil, soybean","Oil, sunflower"
0,1961,7515823.0,9500322.0,12746566.0,3688186.0,1470757.0,6626752.0,7161016.0,2357251.0,5403878.0,1985736.0,12792646.0,8150917.0
1,1962,8925274.0,9911752.0,13289585.0,4177110.0,1545697.0,4553847.0,7171496.0,2339709.0,5761541.0,2379515.0,14014734.0,9572099.0
2,1963,8668362.0,10803476.0,14653169.0,4001376.0,1589790.0,8871607.0,7429088.0,2165697.0,5568586.0,2380065.0,14958936.0,10076964.0
3,1964,8367046.0,11580974.0,15166158.0,4040149.0,1674965.0,4506758.0,7548695.0,2374090.0,5377562.0,2398931.0,14990326.0,9903156.0
4,1965,8649448.0,12268292.0,13742532.0,4507195.0,1837119.0,6092509.0,7458087.0,2452860.0,7559540.0,2418330.0,16236072.0,12540614.0


# How has demand for edible oil changed over time? How much has the market grown in the last 60 years?

In [87]:
# 🗸
veg_oil_prodn_fig = px.area(production_df, x='Year', y=production_df.columns[1:])
veg_oil_prodn_fig.update_traces(textfont_size=16, hovertemplate=None)
veg_oil_prodn_fig.update_layout(hovermode="x")

veg_oil_prodn_fig.update_layout(
    title_text="<b>Global Vegetable Oil Production<b>",
    title_font_size=40,
    legend_font_size=20,
    width=1000,
    height=700
)

veg_oil_prodn_fig.update_xaxes(
    title_text="</b>Year</b>",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25)
)

veg_oil_prodn_fig.update_yaxes(
    title_text="<b>Palm Oil Fruit (mt)</b>",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25))

veg_oil_prodn_fig.show()

# Conclusion 1:
- the oil market has grown by 10x in the 60 years since 1961
- palm oil has overtaken traditional crops like soybean and rapeseed as the largest contributor to global edible oil production

# Palm Oil Production

In [88]:
palm_oil_production = production_df[["Year", "Oil, palm"]]  # new df
palm_oil_prodn_fig = px.line(
    palm_oil_production,
    x="Year",
    y="Oil, palm"
)

palm_oil_prodn_fig.update_layout(
    title_text="<b>Global Oil Palm Production<b>",
    title_font_size=40,
    legend_font_size=20,
    width=1000,
    height=700
)

palm_oil_prodn_fig.update_xaxes(
    title_text="Year",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25)
)

palm_oil_prodn_fig.update_yaxes(
    title_text="<b>Palm Oil production (mt)</b>",
    title_font=dict(size=30, family='Verdana', color='white'),
    tickfont=dict(family='Calibri', color='white', size=25)
)

palm_oil_prodn_fig.show()

# Conclusion 2:
- palm oil production capacity has continued to grow very rapidly to keep up with demand.
- in 1961 total global production of palm oil was 7.6 million tons
- by 2019 that had increaseed to 304.3 million tons, an increase of 40x

# How has production of palm oil changed by location over time?
### Geo Plot of production

In [89]:
production_df = pd.read_csv(os.path.join(DATA, 'Production_Crops_Livestock_E_All_Data.csv'), encoding='ISO-8859-1')
production_df = production_df.drop(['Area Code', 'Item Code', 'Element Code', 'Element', 'Unit'], axis=1)
production_df.columns = production_df.columns.str.replace('Y', '')
production_df = production_df.loc[:,~production_df.columns.str.endswith('F')]
production_df = production_df[production_df['Item'].isin(oil_crops)].reset_index(drop=True)
production_df['Code'] = production_df['Area'].str[:3]
production_df['Code'] = production_df['Code'].str.upper()
code = production_df.pop('Code')  # pop the code column
production_df.insert(1, 'Code', code)  # move it to the position we want
prodction_df = veg_oil_by_country = production_df[~production_df['Code'].isnull()].copy()
production_df = production_df.rename(columns={'Area': 'Entity'})  # this isn't strictly necessary. just to match wilson's work.
# need a pivot to get 'Year' into a column and 'oil type' as the column header from [3:]
production_df

Unnamed: 0,Entity,Code,Item,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,AFG,"Oil, cottonseed",4997.0,7716.0,11742.0,7960.0,7926.0,6453.0,7574.0,...,3725.0,4098.0,4762.0,4832.0,5691.0,6626.0,4176.0,6520.0,8247.0,
1,Afghanistan,AFG,"Oil, linseed",3531.0,3701.0,2857.0,3377.0,4327.0,4984.0,4962.0,...,675.0,706.0,734.0,790.0,5375.0,18316.0,19698.0,25181.0,27186.0,
2,Afghanistan,AFG,"Oil, olive, virgin",82.0,90.0,82.0,90.0,82.0,90.0,82.0,...,539.0,539.0,546.0,658.0,711.0,741.0,763.0,812.0,837.0,
3,Afghanistan,AFG,"Oil, sesame",2253.0,1876.0,1831.0,2722.0,2821.0,3149.0,3351.0,...,1292.0,1261.0,1334.0,1250.0,861.0,1043.0,1108.0,1211.0,1438.0,
4,Afghanistan,AFG,"Oil, sunflower",2938.0,3138.0,3138.0,3138.0,3238.0,3438.0,3439.0,...,60.0,61.0,138.0,960.0,1035.0,1805.0,3167.0,3199.0,3066.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1322,Net Food Importing Developing Countries,NET,"Oil, palm kernel",68166.0,62263.0,54786.0,66932.0,67852.0,69725.0,80787.0,...,157233.0,178172.0,190218.0,185730.0,188782.0,210847.0,231380.0,240627.0,256424.0,
1323,Net Food Importing Developing Countries,NET,"Oil, rapeseed",88986.0,96308.0,89137.0,90688.0,91425.0,89796.0,106003.0,...,550885.0,546318.0,381267.0,547877.0,591258.0,767189.0,687116.0,578124.0,587142.0,
1324,Net Food Importing Developing Countries,NET,"Oil, sesame",85690.0,99780.0,101034.0,90865.0,110276.0,106373.0,121703.0,...,378501.0,448481.0,364667.0,296017.0,361178.0,370513.0,339777.0,344639.0,331792.0,
1325,Net Food Importing Developing Countries,NET,"Oil, soybean",6238.0,13598.0,10197.0,17269.0,11513.0,14388.0,12144.0,...,565174.0,594144.0,637230.0,621550.0,802289.0,822007.0,1106512.0,1552340.0,1631079.0,


## I would like 3 choropleth maps with animation: 1 for production, 1 for imports, 1 for exports.

In [90]:
# the first choropleth map, not animated
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=veg_oil_by_country['Code'],
        z=veg_oil_by_country[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(title_text = f'{veg_oil_type} Vegetable Oil')
fig.show()

KeyError: 'palm'

# Conclusion 3:
- the majority of palm oil is produced in Malaysia & Indonesia
- Earlier, Malaysia was the leading producer, but more recently Indonesia has become the dominant producer
- There are limits to further increases for both nations as the primary driver to production capacity is suitable land.
- Both countries have finite supply of suitable land

In [None]:
# pattern = r'(?<=Oil, ).+?(?= - \d)'
# cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in veg_oil_yearly_production]
# cols = [re.sub(' ', '_', c) for c in cols]
# cols = [re.sub('\W', '', c) for c in cols]

# veg_oil_yearly_production.columns = cols
# veg_oil_yearly_production.reset_index(inplace=True)
# veg_oil_yearly_production.info()

# cols_to_rename = production_df.columns[3:]
#
# cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in cols_to_rename]
# cols = [re.sub(' ', '_', c) for c in cols]
# cols = [re.sub('\W', '', c) for c in cols]
#
# veg_oil_by_country.columns = veg_oil_by_country.columns.tolist()[:3] + cols
# veg_oil_by_country

# Trade



In [None]:
trade_df = pd.read_csv(os.path.join(DATA, 'Trade_CropsLivestock_E_All_Data.csv'), encoding='ISO-8859-1')
trade_df.head()

# Export: Which countries export palm oil?
i expect exports to closely mirror production

In [None]:
exports_df = trade_df[trade_df['Element'].str.contains('Export Quantity') == True]
exports_df = exports_df[exports_df['Item'].isin(oil_crops)==True].drop(columns=['Area Code', 'Item Code', 'Element Code'])
exports_df = exports_df.groupby(by=['Item']).sum()
exports_df.columns = exports_df.columns.str.replace('Y', '')
print(exports_df)

In [None]:
# choropleth map
geo_fig = px.choropleth(
    production_df,  # the dataframe
    locations='Item',
    color=production_df['Oil, palm'],
    color_continuous_scale=px.colors.diverging.PiYG,
    locationmode='ISO-3',
    animation_frame='Year',
    projection='natural earth'
)
geo_fig.update_layout(title_text = f'Palm Oil Exports',)

geo_fig.show()

# Help! ⬇
### Imports: Which countries import Palm Oil?

In [None]:
imports_df = trade_df[trade_df['Element'].str.contains('Import Quantity') == True]
imports_df = imports_df.drop(imports_df[imports_df['Area Code'] > 1000].index)
imports_df = imports_df.drop(imports_df[imports_df['Area Code'].isin(area_code_list)].index)
imports_df = imports_df[imports_df['Item'].isin(oil_crops)==True].drop(columns=[ 'Item Code', 'Element Code'])
imports_df.columns = imports_df.columns.str.replace('Y', '')
imports_df = imports_df.loc[:,~imports_df.columns.str.endswith('F')]
# validate the country and generate unicode-3 for each country
imports_df['country_val'] = validate_country(imports_df["Area"])
# remove the few countries that don't pass validation
imports_df = imports_df.loc[imports_df['country_val'] != False]
# generate the utf-3 code for each country
imports_df = clean_country(df=imports_df, column="Area", output_format='alpha-3')
imports_df

In [None]:
# I'd like a choropleth map with animation for imports
imports_geo_fig = px.choropleth(
    imports_df,
    locations='Area',
    color=oil_crops,
    color_continuous_scale=px.colors.sequential.Oranges,
    locationmode='ISO-3',
    projection='natural earth'
)

imports_geo_fig.update_layout(
    title_text="<b>Global Palm Oil Imports<b>",
    title_font_size=40,
    legend_font_size=20,
    width=1100,
    height=750
)

imports_geo_fig.show()

# Conclusion 4:
the two dominant importers are India and China. This is not surprising: growth in imports over the time period driven by growing populations and per capita GDP.