In [2]:
import os
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.spatial.ckdtree import coo_entries

DATA = 'data'

In [3]:
veg_oil_prodn = pd.read_csv(os.path.join(DATA, 'vegetable_oil_production_01062022.csv'))
veg_oil_prodn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47863 entries, 0 to 47862
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Domain Code       47863 non-null  object 
 1   Domain            47863 non-null  object 
 2   Area Code (FAO)   47863 non-null  int64  
 3   Area              47863 non-null  object 
 4   Element Code      47863 non-null  int64  
 5   Element           47863 non-null  object 
 6   Item Code (FAO)   47863 non-null  int64  
 7   Item              47863 non-null  object 
 8   Year Code         47863 non-null  int64  
 9   Year              47863 non-null  int64  
 10  Unit              47863 non-null  object 
 11  Value             44729 non-null  float64
 12  Flag              40047 non-null  object 
 13  Flag Description  47863 non-null  object 
dtypes: float64(1), int64(5), object(8)
memory usage: 5.1+ MB


In [4]:
vegetable_oil_production = veg_oil_prodn[["Year", "Item", "Value"]]
vegetable_oil_production.info()
vegetable_oil_production.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47863 entries, 0 to 47862
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    47863 non-null  int64  
 1   Item    47863 non-null  object 
 2   Value   44729 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 1.1+ MB


Unnamed: 0,Year,Item,Value
0,1961,"Oil, cottonseed",4997.0
1,1962,"Oil, cottonseed",7716.0
2,1963,"Oil, cottonseed",11742.0
3,1964,"Oil, cottonseed",7960.0
4,1965,"Oil, cottonseed",7926.0


In [5]:
vegetable_oil_production = vegetable_oil_production.groupby(["Year", "Item"])["Value"].sum().reset_index()

In [6]:
vegetable_oil_production.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767 entries, 0 to 766
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    767 non-null    int64  
 1   Item    767 non-null    object 
 2   Value   767 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 18.1+ KB


In [7]:
vegetable_oil_production = vegetable_oil_production.pivot(columns="Item",index="Year")
vegetable_oil_production.head()

Unnamed: 0_level_0,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value
Item,"Oil, coconut (copra)","Oil, cottonseed","Oil, groundnut","Oil, linseed","Oil, maize","Oil, olive, virgin","Oil, palm","Oil, palm kernel","Oil, rapeseed","Oil, safflower","Oil, sesame","Oil, soybean","Oil, sunflower"
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
1961,1633350.0,2308732.0,2671133.0,854706.0,350146.0,1359340.0,1518901.0,490955.0,1205779.0,94176.0,468071.0,3308351.0,1961147.0
1962,1983487.0,2401261.0,2796692.0,974771.0,368321.0,931298.0,1515941.0,491444.0,1288257.0,157264.0,566256.0,3605636.0,2307777.0
1963,1914760.0,2653521.0,3075010.0,937672.0,378168.0,1807877.0,1575070.0,454371.0,1288358.0,155827.0,570664.0,3845507.0,2425820.0
1964,1839165.0,2894971.0,3206274.0,944800.0,398323.0,936277.0,1610032.0,494483.0,1292996.0,126048.0,578031.0,3848921.0,2380328.0
1965,1899538.0,3091282.0,2952694.0,1048865.0,436068.0,1244506.0,1616213.0,516250.0,1837794.0,128107.0,570083.0,4135235.0,3039955.0


In [8]:
new_column_headers = [
    "coconut",
    "cottonseed",
    "groundnut",
    "linseed",
    "maize",
    "olive",
    "palm",
    "palm kernel",
    "rapeseed",
    "safflower",
    "sesame",
    "soybean",
    "sunflower"]

In [9]:
vegetable_oil_production.columns = new_column_headers
vegetable_oil_production.reset_index()

Unnamed: 0,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower
0,1961,1633350.0,2308732.0,2671133.0,854706.0,350146.0,1359340.0,1518901.0,490955.0,1205779.0,94176.0,468071.0,3308351.0,1961147.0
1,1962,1983487.0,2401261.0,2796692.0,974771.0,368321.0,931298.0,1515941.0,491444.0,1288257.0,157264.0,566256.0,3605636.0,2307777.0
2,1963,1914760.0,2653521.0,3075010.0,937672.0,378168.0,1807877.0,1575070.0,454371.0,1288358.0,155827.0,570664.0,3845507.0,2425820.0
3,1964,1839165.0,2894971.0,3206274.0,944800.0,398323.0,936277.0,1610032.0,494483.0,1292996.0,126048.0,578031.0,3848921.0,2380328.0
4,1965,1899538.0,3091282.0,2952694.0,1048865.0,436068.0,1244506.0,1616213.0,516250.0,1837794.0,128107.0,570083.0,4135235.0,3039955.0
5,1966,2103880.0,3155496.0,3080353.0,940570.0,441892.0,1286487.0,1726205.0,517252.0,1847340.0,183440.0,539783.0,4678015.0,3049590.0
6,1967,1967949.0,2783612.0,3283987.0,829748.0,448913.0,1358816.0,1741217.0,439146.0,1943493.0,185450.0,574676.0,4987435.0,3525106.0
7,1968,1877391.0,2752218.0,3011566.0,823883.0,452839.0,1502536.0,1911724.0,459260.0,2120921.0,127028.0,606401.0,4980757.0,3693732.0
8,1969,1848141.0,2930020.0,3099074.0,840106.0,475093.0,1288796.0,2042471.0,508730.0,1999990.0,141480.0,596639.0,5424443.0,3672450.0
9,1970,1928726.0,2875362.0,3534011.0,1055964.0,494680.0,1405284.0,2049339.0,511973.0,2164826.0,158710.0,680524.0,6686595.0,3670857.0


In [10]:
vegetable_oil_production_figure = px.area(vegetable_oil_production, x='Year', y=vegetable_oil_production.columns[1:])
vegetable_oil_production_figure.update_traces(textfont_size=16, hovertemplate=None)
vegetable_oil_production_figure.update_layout(hovermode="x")
vegetable_oil_production_figure.update_layout(title_text="<b>Vegetable Oil Production<b>",title_font_size=40, legend_font_size=20, width=1800, height=1400)  # Add figure title
vegetable_oil_production_figure.update_xaxes(title_text="</b>Year</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # format x-axis
vegetable_oil_production_figure.update_yaxes(title_text="<b>Palm Oil Fruit (mt)</b>", title_font=dict(size=30, family='Verdana', color='white'), tickfont=dict(family='Calibri', color='white', size=25))  # Format y-axes
vegetable_oil_production_figure.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['coconut', 'cottonseed', 'groundnut', 'linseed', 'maize', 'olive', 'palm', 'palm kernel', 'rapeseed', 'safflower', 'sesame', 'soybean', 'sunflower'] but received: Year