In [111]:
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

DATA = 'data'

# Background
Over the last 60 years, Palm Olein has grown  the world's single largest vegetable oil crop. It makes up 35% of edible oil consumption globally, and is used in numerous non-food industrial applications. Production is centred in Malaysia and Indonesia.

This Notebook will provide background analysis of the market for palm oil and vegetable oils. A second notebook will analyse the current market and extraordinary price levels in the palm oil market

# Palm Oil's environmental reputation
There is a perception that palm oil is environmentally unfriendly. Public pressure appears to be having comercial effects. "palm oil-fee" is now a branding strategy, and some companies appear to be turning to alternatives. While it is correct that palm oil is grown in areas that were once rain forest, of the environmental impact of palm oil cultivation and refinement relative to other edible oils is more complex.

# Visualisation
- **production of palm oil**: total world production & by country and region over time. line plot
- **vegetable oils production**: production and by oil type, country and region over time. stacked area plot
- **imports**: which countries import the most oil? how has it changed over time?. global map/heatmap
- **production by country**: palm oil production by country over time. global map/heatmap
- **production by country**: horizontal bar chart with national output at end of bar.
- **exports**: which countries export palm oil? how has it changed over time? global map/heatmap.
- **land used for palm oil**: how much land is used for palm oil cultivation over time? line plot.
- **land used for vegetable oil**: how much land is used for the cultivation of oil crops, by crop, by country and region, over time? stacked area plot.
- **oil yield by crop**: a comparison of oil yield per hectare of land cultivated by crop. shows that palm oil is the most productive per hectare. horizontal bar plot showing top 10 crops
- **price**: palm olein and other edible oil prices and other edible oil prices. line chart

# 1) Global palm oil production
Palm oil production has increased rapidly over the past 50 years. In 1970, the world was producing only 2 million tonnes. This is now 35 times higher: in 2018 the world produced 71 million tonnes. The change in global production is shown in the chart.3

The rise of palm oil follows the rapid increase in demand for vegetable oils more broadly. The breakdown of global vegetable oil production by crop is shown in the stacked area chart. Global production increased ten-fold since the 1960s – from 17 to 170 million tonnes in 2014. **more recent data for 2018 comes to 218 million tonnes**.

The story of palm oil is less about it as an isolated commodity, but more about the story of the rising demand for vegetable oils. Palm oil is a very productive crop. It produces 36% of the world’s oil, but uses less than 9% of croplands devoted to oil production. It has favourable production costs and is among the cheapest edible oils. Palm Oil has therefore been a natural choice to meet this demand.

Production of palm oil has increased by **4800% over the last 60 years**. The growth has occurred to meet rising demands for vegetable oils in general. Palm oil's growth is a function of increased demand for edible oils, combined with palm oil's favourable cost of production.


- 1961 = 1,478,901mt
- 2018 = 71,453,193mt
- 48x increase in 57 years

need to include a drop down menu to select countries and regions

In [112]:
prodn = pd.read_csv(os.path.join(DATA, 'oil_palm_fruit_production_data_5-27-2022.csv'))  # using the data from FAO
prodn.head()

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code (FAO),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,QCL,Crops and livestock products,7,Angola,5510,Production,254,Oil palm fruit,1961,1961,tonnes,210000.0,F,FAO estimate
1,QCL,Crops and livestock products,7,Angola,5510,Production,254,Oil palm fruit,1962,1962,tonnes,210000.0,F,FAO estimate
2,QCL,Crops and livestock products,7,Angola,5510,Production,254,Oil palm fruit,1963,1963,tonnes,200000.0,F,FAO estimate
3,QCL,Crops and livestock products,7,Angola,5510,Production,254,Oil palm fruit,1964,1964,tonnes,210000.0,F,FAO estimate
4,QCL,Crops and livestock products,7,Angola,5510,Production,254,Oil palm fruit,1965,1965,tonnes,170000.0,F,FAO estimate


In [113]:
prodn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2582 entries, 0 to 2581
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Domain Code       2582 non-null   object 
 1   Domain            2582 non-null   object 
 2   Area Code (FAO)   2582 non-null   int64  
 3   Area              2582 non-null   object 
 4   Element Code      2582 non-null   int64  
 5   Element           2582 non-null   object 
 6   Item Code (FAO)   2582 non-null   int64  
 7   Item              2582 non-null   object 
 8   Year Code         2582 non-null   int64  
 9   Year              2582 non-null   int64  
 10  Unit              2582 non-null   object 
 11  Value             2514 non-null   float64
 12  Flag              1943 non-null   object 
 13  Flag Description  2582 non-null   object 
dtypes: float64(1), int64(5), object(8)
memory usage: 282.5+ KB


In [114]:
prodn = prodn[['Area', 'Year', 'Value']]  # let's just keep what we need for now
prodn.head()

Unnamed: 0,Area,Year,Value
0,Angola,1961,210000.0
1,Angola,1962,210000.0
2,Angola,1963,200000.0
3,Angola,1964,210000.0
4,Angola,1965,170000.0


In [115]:
prodn['Area'].unique()

array(['Angola', 'Benin', 'Brazil', 'Burundi', 'Cambodia', 'Cameroon',
       'Central African Republic', 'China', 'China, mainland', 'Colombia',
       'Congo', 'Costa Rica', "Côte d'Ivoire",
       'Democratic Republic of the Congo', 'Dominican Republic',
       'Ecuador', 'Equatorial Guinea', 'Gabon', 'Gambia', 'Ghana',
       'Guatemala', 'Guinea', 'Guinea-Bissau', 'Honduras', 'Indonesia',
       'Liberia', 'Madagascar', 'Malaysia', 'Mexico', 'Nicaragua',
       'Nigeria', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru',
       'Philippines', 'Sao Tome and Principe', 'Senegal', 'Sierra Leone',
       'Solomon Islands', 'Suriname', 'Thailand', 'Togo',
       'United Republic of Tanzania',
       'Venezuela (Bolivarian Republic of)'], dtype=object)

In [116]:
prodn = prodn.groupby("Year")["Value"].sum().reset_index()  # sum the countries to get the world total for each year
prodn

Unnamed: 0,Year,Value
0,1961,13756250.0
1,1962,13365466.0
2,1963,13699681.0
3,1964,13709323.0
4,1965,13911506.0
5,1966,13871499.0
6,1967,13361568.0
7,1968,14338539.0
8,1969,15070861.0
9,1970,15463942.0


In [117]:
palm_oil_prodn_fig = px.line(prodn,
                             x="Year",
                             y="Value")  # insert a new column for world

palm_oil_prodn_fig.update_layout(title_text="<b>Global Oil Palm Production<b>",
                                 title_font_size=40,
                                 legend_font_size=20,
                                 width=1400,
                                 height=1000)  # Add figure title

palm_oil_prodn_fig.update_xaxes(title_text="Year",
                                title_font=dict(size=30,
                                                family='Verdana',
                                                color='white'),
                                tickfont=dict(family='Calibri',
                                              color='white',
                                              size=25))  # format x-axis

palm_oil_prodn_fig.update_yaxes(title_text="<b>Palm Oil production (mt)</b>",
                                title_font=dict(size=30,
                                                family='Verdana',
                                                color='white'),
                                tickfont=dict(family='Calibri',
                                              color='white',
                                              size=25))  # Format y-axes

palm_oil_prodn_fig.show()
# To-do: format plot. button to add country or region. automation

# 2) Land used for Palm Oil Production

There should be a strong correlation between increased areas under cultivation for oil palm and increased production of palm oil.

Total production should effectively be the sum of total hectares under cultivation and yield per hectare. Production increases are driven by increases in land under cultivation and improving (or deteriorating) yields per hectare.

- plot by country over time (stacked line plot)
- plot by country over time (geo heat map)

In [118]:
land = pd.read_csv(os.path.join(DATA, 'land-use-palm-oil.csv'))
world_land = land.loc[land['Entity'] == 'World']
oil_palm_fruit = world_land["Crops - Oil palm fruit - 254 - Area harvested - 5312 - ha"]

In [119]:
land_fig = px.line(world_land, x="Year", y=oil_palm_fruit)

# Add figure title
land_fig.update_layout(title_text="<b>Land under Cultivation (Palm Oil)<b>",title_font_size=40, legend_font_size=20, width=1400, height=1000)

# format x-axis
land_fig.update_xaxes(title_text="</b>Year</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))

# Format y-axes
land_fig.update_yaxes(title_text="<b>Palm Oil Fruit (mt)</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))

land_fig.show()


In [120]:
v_oil_production = pd.read_csv(os.path.join(DATA, "vegetable_oil_production_01062022.csv"))
v_oil_production

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code (FAO),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,QCL,Crops and livestock products,2,Afghanistan,5510,Production,331,"Oil, cottonseed",1961,1961,tonnes,4997.0,Fc,Calculated data
1,QCL,Crops and livestock products,2,Afghanistan,5510,Production,331,"Oil, cottonseed",1962,1962,tonnes,7716.0,Fc,Calculated data
2,QCL,Crops and livestock products,2,Afghanistan,5510,Production,331,"Oil, cottonseed",1963,1963,tonnes,11742.0,Fc,Calculated data
3,QCL,Crops and livestock products,2,Afghanistan,5510,Production,331,"Oil, cottonseed",1964,1964,tonnes,7960.0,Fc,Calculated data
4,QCL,Crops and livestock products,2,Afghanistan,5510,Production,331,"Oil, cottonseed",1965,1965,tonnes,7926.0,Fc,Calculated data
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47858,QCL,Crops and livestock products,181,Zimbabwe,5510,Production,268,"Oil, sunflower",2015,2015,tonnes,2000.0,*,Unofficial figure
47859,QCL,Crops and livestock products,181,Zimbabwe,5510,Production,268,"Oil, sunflower",2016,2016,tonnes,1400.0,*,Unofficial figure
47860,QCL,Crops and livestock products,181,Zimbabwe,5510,Production,268,"Oil, sunflower",2017,2017,tonnes,1500.0,*,Unofficial figure
47861,QCL,Crops and livestock products,181,Zimbabwe,5510,Production,268,"Oil, sunflower",2018,2018,tonnes,1800.0,*,Unofficial figure


In [121]:
v_oil_production["Item"].unique()

array(['Oil, cottonseed', 'Oil, linseed', 'Oil, olive, virgin',
       'Oil, sesame', 'Oil, sunflower', 'Oil, groundnut', 'Oil, palm',
       'Oil, soybean', 'Oil, rapeseed', 'Oil, safflower',
       'Oil, palm kernel', 'Oil, maize', 'Oil, coconut (copra)'],
      dtype=object)

In [122]:
# need to clean, pivot & plot this to replace #3, which will become #2
# new_df columns: "Year", "Oil, cottonseed", "Oil, linseed", "Oil, olive, virgin", "Oil, sesame", "Oil, sunflower", "Oil, groundnut", "Oil, palm", "Oil, soybean", "Oil, rapeseed", "Oil, safflower", "Oil, palm kernel"

# ) Vegetable Oil Production
- plot 1: production over time by crop. currently a simple line chart. Need to turn it into a stacked area chart
- geo-map plot showing production by country or region over time

In [159]:
vegetable_oil_production = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
vegetable_oil_production = vegetable_oil_production[vegetable_oil_production['Entity'] != 'World']
year = vegetable_oil_production['Year'].drop_duplicates(keep='first', inplace=False)
vegetable_oil_production.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11010 entries, 0 to 11063
Data columns (total 16 columns):
 #   Column                                                                     Non-Null Count  Dtype  
---  ------                                                                     --------------  -----  
 0   Entity                                                                     11010 non-null  object 
 1   Code                                                                       9269 non-null   object 
 2   Year                                                                       11010 non-null  int64  
 3   Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes          5685 non-null   float64
 4   Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes           4158 non-null   float64
 5   Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes          4333 non-null   float64
 6   Crops processed - Oil, palm - 257 - Production - 5510 

In [160]:
veg_oil_by_country = vegetable_oil_production[~vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_country

Unnamed: 0,Entity,Code,Year,"Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes","Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes","Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes","Crops processed - Oil, palm - 257 - Production - 5510 - tonnes","Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes","Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes","Crops processed - Oil, cottonseed - 331 - Production - 5510 - tonnes","Crops processed - Oil, coconut (copra) - 252 - Production - 5510 - tonnes","Crops processed - Oil, olive, virgin - 261 - Production - 5510 - tonnes","Crops processed - Oil, safflower - 281 - Production - 5510 - tonnes","Crops processed - Oil, sunflower - 268 - Production - 5510 - tonnes","Crops processed - Oil, maize - 60 - Production - 5510 - tonnes","Crops processed - Oil, palm kernel - 258 - Production - 5510 - tonnes"
0,Afghanistan,AFG,1961,,2253.0,3531.0,,,,4997.0,,82.0,,2938.0,,
1,Afghanistan,AFG,1962,,1876.0,3701.0,,,,7716.0,,90.0,,3138.0,,
2,Afghanistan,AFG,1963,,1831.0,2857.0,,,,11742.0,,82.0,,3138.0,,
3,Afghanistan,AFG,1964,,2722.0,3377.0,,,,7960.0,,90.0,,3138.0,,
4,Afghanistan,AFG,1965,,2821.0,4327.0,,,,7926.0,,82.0,,3238.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11059,Zimbabwe,ZWE,1969,,,,,,4242.0,3004.0,,,,333.0,1800.0,
11060,Zimbabwe,ZWE,1970,,,,,,6006.0,9180.0,,,,832.0,2250.0,
11061,Zimbabwe,ZWE,1971,,,,,,4284.0,10926.0,,,,666.0,3600.0,
11062,Zimbabwe,ZWE,1972,,,,,,7980.0,11844.0,,,,999.0,4500.0,


In [161]:
veg_oil_by_area = vegetable_oil_production[vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_area

Unnamed: 0,Entity,Code,Year,"Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes","Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes","Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes","Crops processed - Oil, palm - 257 - Production - 5510 - tonnes","Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes","Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes","Crops processed - Oil, cottonseed - 331 - Production - 5510 - tonnes","Crops processed - Oil, coconut (copra) - 252 - Production - 5510 - tonnes","Crops processed - Oil, olive, virgin - 261 - Production - 5510 - tonnes","Crops processed - Oil, safflower - 281 - Production - 5510 - tonnes","Crops processed - Oil, sunflower - 268 - Production - 5510 - tonnes","Crops processed - Oil, maize - 60 - Production - 5510 - tonnes","Crops processed - Oil, palm kernel - 258 - Production - 5510 - tonnes"
54,Africa,,1961,2337.0,23706.0,10917.0,1131882.0,31180.0,448152.0,166846.0,35923.0,77143.0,3810.0,33811.0,60192.0,77279.0
55,Africa,,1962,2893.0,30158.0,9627.0,1111006.0,22141.0,517235.0,163605.0,37781.0,87970.0,3752.0,30479.0,62217.0,72627.0
56,Africa,,1963,853.0,30177.0,7144.0,1145004.0,28708.0,577561.0,211456.0,44069.0,143734.0,3818.0,33889.0,64525.0,67875.0
57,Africa,,1964,2312.0,25252.0,10791.0,1160831.0,26614.0,552984.0,180852.0,42435.0,143327.0,3703.0,30011.0,62547.0,78443.0
58,Africa,,1965,1984.0,29208.0,14285.0,1138860.0,24215.0,646906.0,242830.0,42180.0,112610.0,3784.0,27358.0,65229.0,95883.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10812,Western Asia,,2010,528876.0,92769.0,2270.0,,478099.0,11589.0,286082.0,7.0,443892.0,0.0,744693.0,68699.0,
10813,Western Asia,,2011,455841.0,89268.0,12924.0,,446379.0,10886.0,332377.0,227.0,461490.0,0.0,838192.0,64937.0,
10814,Western Asia,,2012,395594.0,91205.0,34718.0,,378278.0,9998.0,308910.0,133.0,471316.0,37.0,795581.0,82235.0,
10815,Western Asia,,2013,391071.0,82693.0,16731.0,,369019.0,9577.0,280947.0,27.0,416862.0,42.0,837072.0,98291.0,


In [162]:
veg_oil_yearly_production = veg_oil_by_country.groupby('Year').sum()
veg_oil_yearly_production.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54 entries, 1961 to 2014
Data columns (total 13 columns):
 #   Column                                                                     Non-Null Count  Dtype  
---  ------                                                                     --------------  -----  
 0   Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes          54 non-null     float64
 1   Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes           54 non-null     float64
 2   Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes          54 non-null     float64
 3   Crops processed - Oil, palm - 257 - Production - 5510 - tonnes             54 non-null     float64
 4   Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes         54 non-null     float64
 5   Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes        54 non-null     float64
 6   Crops processed - Oil, cottonseed - 331 - Production - 

In [163]:
import re
pattern = r'(?<=Oil, ).+?(?= - \d)'
cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in veg_oil_yearly_production]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

In [164]:
veg_oil_yearly_production.columns = cols
veg_oil_yearly_production.reset_index(inplace=True)
veg_oil_yearly_production.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Year           54 non-null     int64  
 1   soybean        54 non-null     float64
 2   sesame         54 non-null     float64
 3   linseed        54 non-null     float64
 4   palm           54 non-null     float64
 5   rapeseed       54 non-null     float64
 6   groundnut      54 non-null     float64
 7   cottonseed     54 non-null     float64
 8   coconut_copra  54 non-null     float64
 9   olive_virgin   54 non-null     float64
 10  safflower      54 non-null     float64
 11  sunflower      54 non-null     float64
 12  maize          54 non-null     float64
 13  palm_kernel    54 non-null     float64
dtypes: float64(13), int64(1)
memory usage: 6.0 KB


In [165]:
veg_oil_prodn_fig = px.area(
    veg_oil_yearly_production,
    x='Year',
    y=veg_oil_yearly_production.columns[1:]
)
veg_oil_prodn_fig.update_traces(textfont_size=16, hovertemplate=None)
veg_oil_prodn_fig.update_layout(hovermode="x")
veg_oil_prodn_fig.update_layout(title_text="<b>Vegetable Oil Production<b>",title_font_size=40, legend_font_size=20, width=1800, height=1400)  # Add figure title
veg_oil_prodn_fig.update_xaxes(title_text="</b>Year</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # format x-axis
veg_oil_prodn_fig.update_yaxes(title_text="<b>Palm Oil Fruit (mt)</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # Format y-axes
veg_oil_prodn_fig.show()

demand has continued to grow for eddibeloils across the board. Palm oil is not the single larges source of edible oils, followed by soy and rapeseed (canola). The historical long run trends suggest no massive changes to supply or demand.

THere have however been a number of "shocks" that have affected the oil market.
- pandemic
- weather
- war in ukraine



Who uses palm oil and what is it used for?

Why has the market for palm oil – and vegetable oils more broadly – increased so rapidly? What is it used for?

Palm oil is a versatile product which is used in a range of products across the world:
- Foods: over two-thirds (68%) is used in foods ranging from margarine to chocolate, pizzas, breads and cooking oils;
- Industrial applications: 27% is used in industrial applications and consumer products such as soaps, detergents, cosmetics and cleaning agents;
- Bioenergy: 5% is used as biofuels for transport, electricity or heat.

While food products dominate globally, this breakdown varies from country-to-country. Some countries use much more palm oil for biofuels than others. In Germany, for example, bioenergy is the largest use, accounting for 41% (more than food at 40%). A push towards increased biofuel consumption in the transport sector has been driving this, despite it being worse for the environment than normal diesel.

In the next section we will look at what countries produce palm oil, but here we see a map of palm oil imports. Although production is focused in only a few countries across the tropical belt, we see that palm oil is an important product across the world.


# Which Countries Import Palm oil



# Where is palm oil grown?

Oil palm is a tropical plant species. It thrives on high rainfall, adequate sunlight and humid conditions – this means the best growing areas are along a narrow band around the equator.4 Palm oil is therefore grown in many countries across Africa, South America, and Southeast Asia. In the map we see the distribution of production across the world.

Small amounts of palm oil are grown in many countries, but the global market is dominated by only two: Indonesia and Malaysia. In 2018, the world produced 72 million tonnes of oil palm. Indonesia accounted for 57% of this (41 million tonnes), and Malaysia produced 27% (20 million tonnes).

84% of global palm oil production comes from Indonesia and Malaysia.

In the chart we see the production of the palm oil plant across a number of countries. Other producers include Thailand, Colombia, Nigeria, Guatemala, and Ecuador. As we’d expect, all of these countries lie along the zone of ‘optimal conditions’ around the equator.

In [166]:
veg_oil_yearly_production

Unnamed: 0,Year,soybean,sesame,linseed,palm,rapeseed,groundnut,cottonseed,coconut_copra,olive_virgin,safflower,sunflower,maize,palm_kernel
0,1961,3037151.0,376688.0,833972.0,1479101.0,1094042.0,2482640.0,2182397.0,1656633.0,1359340.0,91183.0,1945145.0,346546.0,476229.0
1,1962,3314136.0,446366.0,956169.0,1476141.0,1158704.0,2592954.0,2267912.0,2014666.0,931298.0,154331.0,2291303.0,364721.0,472833.0
2,1963,3558007.0,444734.0,920996.0,1535270.0,1117953.0,2809981.0,2462267.0,1952232.0,1807877.0,152774.0,2408407.0,374168.0,440997.0
3,1964,3538521.0,464334.0,927744.0,1570232.0,1076421.0,2894609.0,2638865.0,1866058.0,936277.0,123028.0,2362334.0,393323.0,478057.0
4,1965,3825235.0,455541.0,1028719.0,1576413.0,1533002.0,2649761.0,2763883.0,1923358.0,1244506.0,124893.0,3022958.0,430068.0,497071.0
5,1966,4316415.0,422708.0,911148.0,1666405.0,1525107.0,2722542.0,2793722.0,2120232.0,1286487.0,180332.0,3032815.0,435392.0,497999.0
6,1967,4615535.0,453525.0,800576.0,1669417.0,1634314.0,2948028.0,2408307.0,1989787.0,1358816.0,182249.0,3507762.0,441913.0,426873.0
7,1968,4626857.0,484295.0,789436.0,1823924.0,1805909.0,2694486.0,2376942.0,1910839.0,1502536.0,123658.0,3674740.0,445339.0,439208.0
8,1969,5049043.0,475000.0,803834.0,1942871.0,1704012.0,2801919.0,2586107.0,1885552.0,1288796.0,137742.0,3653089.0,468093.0,491381.0
9,1970,6268195.0,512208.0,1016002.0,1937739.0,1866874.0,3181101.0,2501339.0,1977428.0,1405284.0,154979.0,3651199.0,486680.0,498467.0


In [167]:
vegetable_oil_production['Entity'].unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'American Samoa',
       'Americas', 'Angola', 'Antigua and Barbuda', 'Argentina',
       'Armenia', 'Asia', 'Asia, Central', 'Australia',
       'Australia & New Zealand', 'Austria', 'Azerbaijan', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belgium-Luxembourg', 'Belize',
       'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Caribbean', 'Central African Republic',
       'Central America', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros',
       'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia',
       'Cuba', 'Cyprus', 'Czechia', 'Czechoslovakia',
       'Democratic Republic of Congo', 'Denmark', 'Dominica',
       'Dominican Republic', 'Eastern Africa', 'Eastern Asia',
       'Eastern Europe', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopi

In [168]:
vegetable_oil_production['Code'].unique()

array(['AFG', nan, 'ALB', 'DZA', 'ASM', 'AGO', 'ATG', 'ARG', 'ARM', 'AUS',
       'AUT', 'AZE', 'BGD', 'BRB', 'BLR', 'BEL', 'BLZ', 'BEN', 'BOL',
       'BIH', 'BWA', 'BRA', 'BGR', 'BFA', 'BDI', 'KHM', 'CMR', 'CAN',
       'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COM', 'COG', 'COK', 'CRI',
       'CIV', 'HRV', 'CUB', 'CYP', 'CZE', 'OWID_CZS', 'COD', 'DNK', 'DMA',
       'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST', 'SWZ', 'ETH',
       'FJI', 'FIN', 'FRA', 'PYF', 'GAB', 'GMB', 'GEO', 'DEU', 'GHA',
       'GRC', 'GRD', 'GLP', 'GUM', 'GTM', 'GIN', 'GNB', 'GUY', 'HTI',
       'HND', 'HKG', 'HUN', 'ISL', 'IND', 'IDN', 'IRN', 'IRQ', 'IRL',
       'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KEN', 'KIR', 'KWT',
       'KGZ', 'LAO', 'LVA', 'LBN', 'LBR', 'LBY', 'LTU', 'LUX', 'MDG',
       'MWI', 'MYS', 'MDV', 'MLI', 'MLT', 'MRT', 'MUS', 'OWID_MNS', 'MEX',
       'FSM', 'MDA', 'MNE', 'MSR', 'MAR', 'MOZ', 'MMR', 'NAM', 'NPL',
       'NLD', 'NCL', 'NZL', 'NIC', 'NER', 'NGA', 'PRK', 'MKD', 'NOR',
     

In [169]:
vegetable_oil_production[vegetable_oil_production['Code'].isnull()]

Unnamed: 0,Entity,Code,Year,"Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes","Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes","Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes","Crops processed - Oil, palm - 257 - Production - 5510 - tonnes","Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes","Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes","Crops processed - Oil, cottonseed - 331 - Production - 5510 - tonnes","Crops processed - Oil, coconut (copra) - 252 - Production - 5510 - tonnes","Crops processed - Oil, olive, virgin - 261 - Production - 5510 - tonnes","Crops processed - Oil, safflower - 281 - Production - 5510 - tonnes","Crops processed - Oil, sunflower - 268 - Production - 5510 - tonnes","Crops processed - Oil, maize - 60 - Production - 5510 - tonnes","Crops processed - Oil, palm kernel - 258 - Production - 5510 - tonnes"
54,Africa,,1961,2337.0,23706.0,10917.0,1131882.0,31180.0,448152.0,166846.0,35923.0,77143.0,3810.0,33811.0,60192.0,77279.0
55,Africa,,1962,2893.0,30158.0,9627.0,1111006.0,22141.0,517235.0,163605.0,37781.0,87970.0,3752.0,30479.0,62217.0,72627.0
56,Africa,,1963,853.0,30177.0,7144.0,1145004.0,28708.0,577561.0,211456.0,44069.0,143734.0,3818.0,33889.0,64525.0,67875.0
57,Africa,,1964,2312.0,25252.0,10791.0,1160831.0,26614.0,552984.0,180852.0,42435.0,143327.0,3703.0,30011.0,62547.0,78443.0
58,Africa,,1965,1984.0,29208.0,14285.0,1138860.0,24215.0,646906.0,242830.0,42180.0,112610.0,3784.0,27358.0,65229.0,95883.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10812,Western Asia,,2010,528876.0,92769.0,2270.0,,478099.0,11589.0,286082.0,7.0,443892.0,0.0,744693.0,68699.0,
10813,Western Asia,,2011,455841.0,89268.0,12924.0,,446379.0,10886.0,332377.0,227.0,461490.0,0.0,838192.0,64937.0,
10814,Western Asia,,2012,395594.0,91205.0,34718.0,,378278.0,9998.0,308910.0,133.0,471316.0,37.0,795581.0,82235.0,
10815,Western Asia,,2013,391071.0,82693.0,16731.0,,369019.0,9577.0,280947.0,27.0,416862.0,42.0,837072.0,98291.0,


In [170]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'Australia']

Unnamed: 0,Entity,Code,Year,"Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes","Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes","Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes","Crops processed - Oil, palm - 257 - Production - 5510 - tonnes","Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes","Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes","Crops processed - Oil, cottonseed - 331 - Production - 5510 - tonnes","Crops processed - Oil, coconut (copra) - 252 - Production - 5510 - tonnes","Crops processed - Oil, olive, virgin - 261 - Production - 5510 - tonnes","Crops processed - Oil, safflower - 281 - Production - 5510 - tonnes","Crops processed - Oil, sunflower - 268 - Production - 5510 - tonnes","Crops processed - Oil, maize - 60 - Production - 5510 - tonnes","Crops processed - Oil, palm kernel - 258 - Production - 5510 - tonnes"
574,Australia,AUS,1961,22.0,,3556.0,,,668.0,716.0,18526.0,97.0,470.0,901.0,194.0,
575,Australia,AUS,1962,50.0,,8030.0,,,479.0,575.0,18904.0,86.0,422.0,531.0,243.0,
576,Australia,AUS,1963,180.0,,9226.0,,,521.0,537.0,19653.0,95.0,1513.0,1188.0,413.0,
577,Australia,AUS,1964,313.0,,16338.0,,,727.0,693.0,19452.0,146.0,3734.0,978.0,425.0,
578,Australia,AUS,1965,925.0,,1828.0,,,607.0,2887.0,19470.0,95.0,2702.0,881.0,462.0,
579,Australia,AUS,1966,233.0,70.0,4371.0,,,1018.0,5908.0,19681.0,159.0,7405.0,1522.0,547.0,
580,Australia,AUS,1967,129.0,74.0,3036.0,,,1231.0,4259.0,19721.0,118.0,4856.0,850.0,401.0,
581,Australia,AUS,1968,136.0,85.0,9611.0,,,835.0,8160.0,17533.0,197.0,3170.0,1394.0,462.0,
582,Australia,AUS,1969,233.0,136.0,12359.0,,2000.0,480.0,8638.0,18668.0,202.0,981.0,2730.0,462.0,
583,Australia,AUS,1970,638.0,172.0,12273.0,,13000.0,1416.0,7092.0,16564.0,175.0,2560.0,5469.0,462.0,


In [171]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'New Zealand']

Unnamed: 0,Entity,Code,Year,"Crops processed - Oil, soybean - 237 - Production - 5510 - tonnes","Crops processed - Oil, sesame - 290 - Production - 5510 - tonnes","Crops processed - Oil, linseed - 334 - Production - 5510 - tonnes","Crops processed - Oil, palm - 257 - Production - 5510 - tonnes","Crops processed - Oil, rapeseed - 271 - Production - 5510 - tonnes","Crops processed - Oil, groundnut - 244 - Production - 5510 - tonnes","Crops processed - Oil, cottonseed - 331 - Production - 5510 - tonnes","Crops processed - Oil, coconut (copra) - 252 - Production - 5510 - tonnes","Crops processed - Oil, olive, virgin - 261 - Production - 5510 - tonnes","Crops processed - Oil, safflower - 281 - Production - 5510 - tonnes","Crops processed - Oil, sunflower - 268 - Production - 5510 - tonnes","Crops processed - Oil, maize - 60 - Production - 5510 - tonnes","Crops processed - Oil, palm kernel - 258 - Production - 5510 - tonnes"
6658,New Zealand,NZL,1961,,,1762.0,,,,,2505.0,,,,22.0,
6659,New Zealand,NZL,1962,,,2526.0,,,,,2555.0,,,,43.0,
6660,New Zealand,NZL,1963,,,3071.0,,,,,2067.0,,,,47.0,
6661,New Zealand,NZL,1964,,,3080.0,,,,,1939.0,,,,65.0,
6662,New Zealand,NZL,1965,,,1694.0,,,,,2200.0,,,,76.0,
6663,New Zealand,NZL,1966,,,1759.0,,,,,1721.0,,,,83.0,
6664,New Zealand,NZL,1967,,,2216.0,,,,,2695.0,,,,77.0,
6665,New Zealand,NZL,1968,,,1472.0,,,,,3260.0,,,,97.0,
6666,New Zealand,NZL,1969,,,3288.0,,,,,3133.0,,,,124.0,
6667,New Zealand,NZL,1970,,,3754.0,,,,,2923.0,,,,69.0,


In [172]:
import plotly.graph_objects as go
from ipywidgets import widgets
help(go.Choropleth)

Help on class Choropleth in module plotly.graph_objs._choropleth:

class Choropleth(plotly.basedatatypes.BaseTraceType)
 |  Choropleth(arg=None, autocolorscale=None, coloraxis=None, colorbar=None, colorscale=None, customdata=None, customdatasrc=None, featureidkey=None, geo=None, geojson=None, hoverinfo=None, hoverinfosrc=None, hoverlabel=None, hovertemplate=None, hovertemplatesrc=None, hovertext=None, hovertextsrc=None, ids=None, idssrc=None, legendgroup=None, legendgrouptitle=None, legendrank=None, locationmode=None, locations=None, locationssrc=None, marker=None, meta=None, metasrc=None, name=None, reversescale=None, selected=None, selectedpoints=None, showlegend=None, showscale=None, stream=None, text=None, textsrc=None, uid=None, uirevision=None, unselected=None, visible=None, z=None, zauto=None, zmax=None, zmid=None, zmin=None, zsrc=None, **kwargs)
 |  
 |  Method resolution order:
 |      Choropleth
 |      plotly.basedatatypes.BaseTraceType
 |      plotly.basedatatypes.BaseTrace

In [173]:
cols_to_rename = veg_oil_by_country.columns[3:]

cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in cols_to_rename]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

veg_oil_by_country.columns = veg_oil_by_country.columns.tolist()[:3] + cols
veg_oil_by_country

Unnamed: 0,Entity,Code,Year,soybean,sesame,linseed,palm,rapeseed,groundnut,cottonseed,coconut_copra,olive_virgin,safflower,sunflower,maize,palm_kernel
0,Afghanistan,AFG,1961,,2253.0,3531.0,,,,4997.0,,82.0,,2938.0,,
1,Afghanistan,AFG,1962,,1876.0,3701.0,,,,7716.0,,90.0,,3138.0,,
2,Afghanistan,AFG,1963,,1831.0,2857.0,,,,11742.0,,82.0,,3138.0,,
3,Afghanistan,AFG,1964,,2722.0,3377.0,,,,7960.0,,90.0,,3138.0,,
4,Afghanistan,AFG,1965,,2821.0,4327.0,,,,7926.0,,82.0,,3238.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11059,Zimbabwe,ZWE,1969,,,,,,4242.0,3004.0,,,,333.0,1800.0,
11060,Zimbabwe,ZWE,1970,,,,,,6006.0,9180.0,,,,832.0,2250.0,
11061,Zimbabwe,ZWE,1971,,,,,,4284.0,10926.0,,,,666.0,3600.0,
11062,Zimbabwe,ZWE,1972,,,,,,7980.0,11844.0,,,,999.0,4500.0,


In [174]:
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=veg_oil_by_country['Code'],
        z=veg_oil_by_country[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil Production',
)

fig.show()

In [175]:
# this also looks like production. need to find the data for imports and plot that

geo_fig = px.choropleth(
    veg_oil_by_country,
    locations='Code',
    color=veg_oil_type,
    color_continuous_scale=px.colors.diverging.PiYG,
    locationmode='ISO-3',
    animation_frame='Year',
    projection='natural earth'
)
geo_fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil',
)

geo_fig.show()

In [176]:
veg_oil_by_country[veg_oil_by_country['palm'] > 40_000_000]

Unnamed: 0,Entity,Code,Year,soybean,sesame,linseed,palm,rapeseed,groundnut,cottonseed,coconut_copra,olive_virgin,safflower,sunflower,maize,palm_kernel


In [177]:
oil_yield = pd.read_csv(os.path.join(DATA, "oil-yield-by-crop.csv"))
oil_yield.head()

Unnamed: 0,Entity,Code,Year,Oil yield (t/ha)
0,Coconut Oil,,2018,0.257735
1,Cottonseed Oil,,2018,0.141688
2,Groundnut Oil,,2018,0.183447
3,Olive Oil,,2018,0.339773
4,Palm Oil,,2018,2.835088


# 5) Export Volumes
are a very good way to look at total volumes. Most palm oi is exported as evidenced by comparing the importer country vs the

In [None]:
imports = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
# imports.head()
year = imports['Year']
imports = imports.groupby(year).sum()
imports

In [None]:
oil_production = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
oil_production.head()