In [1]:
import os
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.spatial.ckdtree import coo_entries

DATA = 'data'

In [2]:
production = pd.read_csv('data/to_populate_db/oil_palm_yeild_and_production.csv')
production.head()

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code (FAO),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,QCL,Crops and livestock products,3,Albania,5510,Production,257,"Oil, palm",2010,2010,tonnes,,M,Data not available
1,QCL,Crops and livestock products,3,Albania,5510,Production,257,"Oil, palm",2011,2011,tonnes,,M,Data not available
2,QCL,Crops and livestock products,3,Albania,5510,Production,257,"Oil, palm",2012,2012,tonnes,,M,Data not available
3,QCL,Crops and livestock products,3,Albania,5510,Production,257,"Oil, palm",2013,2013,tonnes,,M,Data not available
4,QCL,Crops and livestock products,3,Albania,5510,Production,257,"Oil, palm",2014,2014,tonnes,,M,Data not available


In [3]:
production.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10364 entries, 0 to 10363
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Domain Code       10364 non-null  object 
 1   Domain            10364 non-null  object 
 2   Area Code (FAO)   10364 non-null  int64  
 3   Area              10364 non-null  object 
 4   Element Code      10364 non-null  int64  
 5   Element           10364 non-null  object 
 6   Item Code (FAO)   10364 non-null  int64  
 7   Item              10364 non-null  object 
 8   Year Code         10364 non-null  int64  
 9   Year              10364 non-null  int64  
 10  Unit              10364 non-null  object 
 11  Value             9923 non-null   float64
 12  Flag              8345 non-null   object 
 13  Flag Description  10364 non-null  object 
dtypes: float64(1), int64(5), object(8)
memory usage: 1.1+ MB


In [4]:
production.columns = ['entity', 'code', 'year', 'crops']

ValueError: Length mismatch: Expected axis has 14 elements, new values have 4 elements

In [None]:
world_df = production.loc[production['entity'] == 'World']
world_df.head()

In [None]:
fig = px.line(world_df, x='year', y='crops', title='Oil Palm Production')
fig.show()

In [None]:
price = pd.read_csv(os.path.join(DATA, 'palm oil prices 021020 - 290422.csv'))
price.head()

In [None]:
price.info()

In [None]:
price.columns = ['date', 'price']

In [None]:
price['price'] = price['price'].ffill()

In [None]:
fig = px.line(price, x='date', y='price', title='Palm Olein Price')
fig.show()

In [None]:
# wavelet

import pywt
import numpy as np
from statsmodels.robust import mad

wavelet = pywt.Wavelet('bior1.5')
prices = price['price'].values
level = 5
mode='periodic'
coeffs = pywt.wavedec(prices, wavelet, mode=mode, level=5)


# The Wavelet decoding output
- I forgot what these coefficients represent, have to check the theory of Wavelet
- At the moment let's just park it. I just realised that I used Wavelet to denoise the signal (Wavelet smoothing) in my previous fault detection work
-

In [None]:
from matplotlib import pyplot as plt
ax = None
if ax is None:
    f, ax = plt.subplots(len(coeffs), figsize=(16, 9))

for i, coef in enumerate(coeffs):
    ax[i].plot(coef)

In [None]:
x = np.arange(len(prices))
# calculate a threshold
sigma = mad(coeffs[-level])
# changing this threshold also changes the behavior
uthresh = sigma * np.sqrt(2 * np.log(len(prices)))
coeffs[1:] = (pywt.threshold(i, value=uthresh, mode="soft") for i in coeffs[1:])
# reconstruct the signal using the thresholded coefficients
y = pywt.waverec(coeffs, wavelet, mode=mode)
f, ax = plt.subplots(figsize=(16, 9))
ax.plot(x, prices, color="b", alpha=0.5, label='raw signal')
ax.plot(x, y[:len(x)], color="r", label='denoised signal')
ax.set_xlim((0, np.max(x)))

In [None]:
vegetable_oil_production = pd.read_csv(os.path.join(DATA, 'vegetable-oil-production.csv'))
vegetable_oil_production = vegetable_oil_production[vegetable_oil_production['Entity'] != 'World']
year = vegetable_oil_production['Year'].drop_duplicates(keep='first', inplace=False)
vegetable_oil_production.info()

In [None]:
veg_oil_by_country = vegetable_oil_production[~vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_country

In [None]:
veg_oil_by_area = vegetable_oil_production[vegetable_oil_production['Code'].isnull()].copy()
veg_oil_by_area

In [None]:
veg_oil_yearly_production = veg_oil_by_country.groupby('Year').sum()
veg_oil_yearly_production.info()

In [None]:
import re
pattern = r'(?<=Oil, ).+?(?= - \d)'  # this is neat, please explain this
cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in veg_oil_yearly_production]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

In [None]:
veg_oil_yearly_production.columns = cols
veg_oil_yearly_production.reset_index(inplace=True)
veg_oil_yearly_production.info()

In [None]:
veg_oil_fig = px.area(
    veg_oil_yearly_production,
    x='Year',
    y=veg_oil_yearly_production.columns[1:]
)
veg_oil_fig.update_traces(textfont_size=16, hovertemplate=None)
veg_oil_fig.update_layout(hovermode="x")

veg_oil_fig.show()

In [None]:
veg_oil_yearly_production

In [None]:
vegetable_oil_production['Entity'].unique()

In [None]:
vegetable_oil_production['Code'].unique()

In [None]:
vegetable_oil_production[vegetable_oil_production['Code'].isnull()]

In [None]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'Australia']

In [None]:
vegetable_oil_production[vegetable_oil_production['Entity'] == 'New Zealand']

In [None]:
import plotly.graph_objects as go
from ipywidgets import widgets
help(go.Choropleth)

In [None]:
cols_to_rename = veg_oil_by_country.columns[3:]

cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in cols_to_rename]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

veg_oil_by_country.columns = veg_oil_by_country.columns.tolist()[:3] + cols
veg_oil_by_country

In [None]:
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=veg_oil_by_country['Code'],
        z=veg_oil_by_country[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil',
)

fig.show()

In [None]:
geo_fig = px.choropleth(
    veg_oil_by_country,
    locations='Code',
    color=veg_oil_type,
    color_continuous_scale=px.colors.diverging.PiYG,
    locationmode='ISO-3',
    animation_frame='Year',
    projection='natural earth'
)
geo_fig.update_layout(
    title_text = f'{veg_oil_type} Vegetable Oil',
)

geo_fig.show()

In [None]:
veg_oil_by_country[veg_oil_by_country['palm'] > 40_000_000]