In [8]:
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

DATA = 'data'
POP = 'to_populate_db/'

# Background
Over the last 60 years, Palm Olein has grown  the world's single largest vegetable oil crop. It makes up 35% of edible oil consumption globally, and is used in numerous non-food industrial applications. Production is centred in Malaysia and Indonesia.

This Notebook will provide background analysis of the market for palm oil and vegetable oils. A second notebook will analyse the current market and extraordinary price levels in the palm oil market

# Palm Oil's environmental reputation
There is a perception that palm oil is environmentally unfriendly. Public pressure appears to be having comercial effects. "palm oil-fee" is now a branding strategy, and some companies appear to be turning to alternatives. While it is correct that palm oil is grown in areas that were once rain forest, of the environmental impact of palm oil cultivation and refinement relative to other edible oils is more complex.

# Visualisation
- **production of palm oil**: total world production & by country and region over time. line plot
- **vegetable oils production**: production and by oil type, country and region over time. stacked area plot
- **imports**: which countries import the most oil? how has it changed over time?. global map/heatmap
- **production by country**: palm oil production by country over time. global map/heatmap
- **production by country**: horizontal bar chart with national output at end of bar.
- **exports**: which countries export palm oil? how has it changed over time? global map/heatmap.
- **land used for palm oil**: how much land is used for palm oil cultivation over time? line plot.
- **land used for vegetable oil**: how much land is used for the cultivation of oil crops, by crop, by country and region, over time? stacked area plot.
- **oil yield by crop**: a comparison of oil yield per hectare of land cultivated by crop. shows that palm oil is the most productive per hectare. horizontal bar plot showing top 10 crops
- **price**: palm olein and other edible oil prices and other edible oil prices. line chart

# 1) Global palm oil production
Palm oil production has increased rapidly over the past 50 years. In 1970, the world was producing only 2 million tonnes. This is now 35 times higher: in 2018 the world produced 71 million tonnes. The change in global production is shown in the chart.3

The rise of palm oil follows the rapid increase in demand for vegetable oils more broadly. The breakdown of global vegetable oil production by crop is shown in the stacked area chart. Global production increased ten-fold since the 1960s – from 17 to 170 million tonnes in 2014. **more recent data for 2018 comes to 218 million tonnes**.

The story of palm oil is less about it as an isolated commodity, but more about the story of the rising demand for vegetable oils. Palm oil is a very productive crop. It produces 36% of the world’s oil, but uses less than 9% of croplands devoted to oil production. It has favourable production costs and is among the cheapest edible oils. Palm Oil has therefore been a natural choice to meet this demand.

Production of palm oil has increased by **4800% over the last 60 years**. The growth has occurred to meet rising demands for vegetable oils in general. Palm oil's growth is a function of increased demand for edible oils, combined with palm oil's favourable cost of production.


- 1961 = 1,478,901mt
- 2018 = 71,453,193mt
- 48x increase in 57 years

need to include a drop down menu to select countries and regions

In [9]:
prodn = pd.read_csv(os.path.join(DATA, 'palm-oil-production.csv'))  # using the data from FAO
prodn.head()

Unnamed: 0,Entity,Code,Year,"Crops - Oil, palm - 257 - Production - 5510 - tonnes"
0,Africa,,1961,1131882
1,Africa,,1962,1111006
2,Africa,,1963,1145004
3,Africa,,1964,1160831
4,Africa,,1965,1138860


In [10]:
prodn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3468 entries, 0 to 3467
Data columns (total 4 columns):
 #   Column                                                Non-Null Count  Dtype 
---  ------                                                --------------  ----- 
 0   Entity                                                3468 non-null   object
 1   Code                                                  2486 non-null   object
 2   Year                                                  3468 non-null   int64 
 3   Crops - Oil, palm - 257 - Production - 5510 - tonnes  3468 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 108.5+ KB


In [12]:
prodn['Entity'].unique()

array(['Africa', 'Americas', 'Angola', 'Asia', 'Asia, Central', 'Belgium',
       'Benin', 'Brazil', 'Burundi', 'Cambodia', 'Cameroon', 'Caribbean',
       'Central African Republic', 'Central America', 'China', 'Colombia',
       'Congo', 'Costa Rica', "Cote d'Ivoire",
       'Democratic Republic of Congo', 'Dominican Republic',
       'Eastern Africa', 'Eastern Asia', 'Eastern Europe', 'Ecuador',
       'Equatorial Guinea', 'Europe', 'Europe, Western', 'European Union',
       'Gabon', 'Gambia', 'Ghana', 'Guatemala', 'Guinea', 'Guinea-Bissau',
       'Honduras', 'Indonesia', 'Kazakhstan',
       'Land Locked Developing Countries', 'Least Developed Countries',
       'Liberia', 'Low Income Food Deficit Countries', 'Madagascar',
       'Malaysia', 'Melanesia', 'Mexico', 'Middle Africa',
       'Net Food Importing Developing Countries', 'Nicaragua', 'Nigeria',
       'Northern Europe', 'Norway', 'Oceania', 'Panama',
       'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Russia',

In [13]:
prodn = prodn.groupby("Year")["Value"].sum().reset_index()  # sum the countries to get the world total for each year
prodn

KeyError: 'Column not found: Value'

In [None]:
palm_oil_prodn_fig = px.line(prodn,
                             x="Year",
                             y="Value")  # insert a new column for world

palm_oil_prodn_fig.update_layout(title_text="<b>Global Oil Palm Production<b>",
                                 title_font_size=40,
                                 legend_font_size=20,
                                 width=1400,
                                 height=1000)  # Add figure title

palm_oil_prodn_fig.update_xaxes(title_text="Year",
                                title_font=dict(size=30,
                                                family='Verdana',
                                                color='white'),
                                tickfont=dict(family='Calibri',
                                              color='white',
                                              size=25))  # format x-axis

palm_oil_prodn_fig.update_yaxes(title_text="<b>Palm Oil production (mt)</b>",
                                title_font=dict(size=30,
                                                family='Verdana',
                                                color='white'),
                                tickfont=dict(family='Calibri',
                                              color='white',
                                              size=25))  # Format y-axes

palm_oil_prodn_fig.show()
# To-do: format plot. button to add country or region. automation

# 2) Land used for Palm Oil Production

There should be a strong correlation between increased areas under cultivation for oil palm and increased production of palm oil.

Total production should effectively be the sum of total hectares under cultivation and yield per hectare. Production increases are driven by increases in land under cultivation and improving (or deteriorating) yields per hectare.

- plot by country over time (stacked line plot)
- plot by country over time (geo heat map)

In [None]:
land = pd.read_csv(os.path.join(DATA, 'land-use-palm-oil.csv'))
world_land = land.loc[land['Entity'] == 'World']
oil_palm_fruit = world_land["Crops - Oil palm fruit - 254 - Area harvested - 5312 - ha"]

In [None]:
land_fig = px.line(world_land, x="Year", y=oil_palm_fruit)

# Add figure title
land_fig.update_layout(title_text="<b>Land under Cultivation (Palm Oil)<b>",title_font_size=40, legend_font_size=20, width=1400, height=1000)

# format x-axis
land_fig.update_xaxes(title_text="</b>Year</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))

# Format y-axes
land_fig.update_yaxes(title_text="<b>Palm Oil Fruit (mt)</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))

land_fig.show()


In [None]:
v_oil_production = pd.read_csv(os.path.join(DATA, "vegetable_oil_production_01062022.csv"))
v_oil_production

In [None]:
v_oil_production["Item"].unique()

In [None]:
# need to clean, pivot & plot this to replace #3, which will become #2
# new_df columns: "Year", "Oil, cottonseed", "Oil, linseed", "Oil, olive, virgin", "Oil, sesame", "Oil, sunflower", "Oil, groundnut", "Oil, palm", "Oil, soybean", "Oil, rapeseed", "Oil, safflower", "Oil, palm kernel"

# ) Vegetable Oil Production
- plot 1: production over time by crop. currently a simple line chart. Need to turn it into a stacked area chart
- geo-map plot showing production by country or region over time

In [None]:
vegetable_oil_production = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
vegetable_oil_production = vegetable_oil_production[vegetable_oil_production['Entity'] != 'World']
year = vegetable_oil_production['Year'].drop_duplicates(keep='first', inplace=False)
vegetable_oil_production.info()

In [None]:
veg_oil_by_country = vegetable_oil_production[~vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_country

In [None]:
veg_oil_by_area = vegetable_oil_production[vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_area

In [None]:
veg_oil_yearly_production = veg_oil_by_country.groupby('Year').sum()
veg_oil_yearly_production.info()

In [None]:
import re
pattern = r'(?<=Oil, ).+?(?= - \d)'
cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in veg_oil_yearly_production]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

In [None]:
veg_oil_yearly_production.columns = cols
veg_oil_yearly_production.reset_index(inplace=True)
veg_oil_yearly_production.info()

In [None]:
veg_oil_prodn_fig = px.area(
    veg_oil_yearly_production,
    x='Year',
    y=veg_oil_yearly_production.columns[1:]
)
veg_oil_prodn_fig.update_traces(textfont_size=16, hovertemplate=None)
veg_oil_prodn_fig.update_layout(hovermode="x")
veg_oil_prodn_fig.update_layout(title_text="<b>Vegetable Oil Production<b>",title_font_size=40, legend_font_size=20, width=1800, height=1400)  # Add figure title
veg_oil_prodn_fig.update_xaxes(title_text="</b>Year</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # format x-axis
veg_oil_prodn_fig.update_yaxes(title_text="<b>Palm Oil Fruit (mt)</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # Format y-axes
veg_oil_prodn_fig.show()

demand has continued to grow for eddibeloils across the board. Palm oil is not the single larges source of edible oils, followed by soy and rapeseed (canola). The historical long run trends suggest no massive changes to supply or demand.

THere have however been a number of "shocks" that have affected the oil market.
- pandemic
- weather
- war in ukraine



Who uses palm oil and what is it used for?

Why has the market for palm oil – and vegetable oils more broadly – increased so rapidly? What is it used for?

Palm oil is a versatile product which is used in a range of products across the world:
- Foods: over two-thirds (68%) is used in foods ranging from margarine to chocolate, pizzas, breads and cooking oils;
- Industrial applications: 27% is used in industrial applications and consumer products such as soaps, detergents, cosmetics and cleaning agents;
- Bioenergy: 5% is used as biofuels for transport, electricity or heat.

While food products dominate globally, this breakdown varies from country-to-country. Some countries use much more palm oil for biofuels than others. In Germany, for example, bioenergy is the largest use, accounting for 41% (more than food at 40%). A push towards increased biofuel consumption in the transport sector has been driving this, despite it being worse for the environment than normal diesel.

In the next section we will look at what countries produce palm oil, but here we see a map of palm oil imports. Although production is focused in only a few countries across the tropical belt, we see that palm oil is an important product across the world.


# Which Countries Import Palm oil



# Where is palm oil grown?

Oil palm is a tropical plant species. It thrives on high rainfall, adequate sunlight and humid conditions – this means the best growing areas are along a narrow band around the equator.4 Palm oil is therefore grown in many countries across Africa, South America, and Southeast Asia. In the map we see the distribution of production across the world.

Small amounts of palm oil are grown in many countries, but the global market is dominated by only two: Indonesia and Malaysia. In 2018, the world produced 72 million tonnes of oil palm. Indonesia accounted for 57% of this (41 million tonnes), and Malaysia produced 27% (20 million tonnes).

84% of global palm oil production comes from Indonesia and Malaysia.

In the chart we see the production of the palm oil plant across a number of countries. Other producers include Thailand, Colombia, Nigeria, Guatemala, and Ecuador. As we’d expect, all of these countries lie along the zone of ‘optimal conditions’ around the equator.

In [None]:
veg_oil_yearly_production

In [None]:
vegetable_oil_production['Entity'].unique()

In [None]:
vegetable_oil_production['Code'].unique()

In [None]:
vegetable_oil_production[vegetable_oil_production['Code'].isnull()]

In [None]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'Australia']

In [None]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'New Zealand']

In [None]:
import plotly.graph_objects as go
from ipywidgets import widgets
help(go.Choropleth)

In [None]:
cols_to_rename = veg_oil_by_country.columns[3:]

cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in cols_to_rename]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

veg_oil_by_country.columns = veg_oil_by_country.columns.tolist()[:3] + cols
veg_oil_by_country

In [None]:
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=veg_oil_by_country['Code'],
        z=veg_oil_by_country[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil Production',
)

fig.show()

In [None]:
# this also looks like production. need to find the data for imports and plot that

geo_fig = px.choropleth(
    veg_oil_by_country,
    locations='Code',
    color=veg_oil_type,
    color_continuous_scale=px.colors.diverging.PiYG,
    locationmode='ISO-3',
    animation_frame='Year',
    projection='natural earth'
)
geo_fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil',
)

geo_fig.show()

In [None]:
veg_oil_by_country[veg_oil_by_country['palm'] > 40_000_000]

In [None]:
oil_yield = pd.read_csv(os.path.join(DATA, "oil-yield-by-crop.csv"))
oil_yield.head()

# 5) Export Volumes
are a very good way to look at total volumes. Most palm oi is exported as evidenced by comparing the importer country vs the

In [None]:
imports = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
# imports.head()
year = imports['Year']
imports = imports.groupby(year).sum()
imports

In [None]:
oil_production = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
oil_production.head()