In [80]:
import os
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.spatial.ckdtree import coo_entries
import re
import plotly.graph_objects as go
from ipywidgets import widgets

DATA = 'data'

In [81]:
veg_oil_producing_countries = pd.read_csv(os.path.join(DATA, 'FAOSTAT_vegetable_oil_production_data.csv'))
year = veg_oil_producing_countries['Year'].drop_duplicates(keep='first', inplace=False)
# veg_oil_by_country.info()

In [82]:
veg_oil_producing_countries = veg_oil_producing_countries[["Area", "Year", "Item", "Value"]].pivot(index=["Area", "Year"], columns="Item").reset_index()  # drop unneeded columns
veg_oil_producing_countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10195 entries, 0 to 10194
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   (Area, )                       10195 non-null  object 
 1   (Year, )                       10195 non-null  int64  
 2   (Value, Oil, coconut (copra))  5004 non-null   float64
 3   (Value, Oil, cottonseed)       5379 non-null   float64
 4   (Value, Oil, groundnut)        5275 non-null   float64
 5   (Value, Oil, linseed)          3365 non-null   float64
 6   (Value, Oil, maize)            2732 non-null   float64
 7   (Value, Oil, olive, virgin)    1733 non-null   float64
 8   (Value, Oil, palm)             2479 non-null   float64
 9   (Value, Oil, palm kernel)      3023 non-null   float64
 10  (Value, Oil, rapeseed)         2978 non-null   float64
 11  (Value, Oil, safflower)        903 non-null    float64
 12  (Value, Oil, sesame)           3141 non-null  

In [83]:
veg_oil_producing_countries.head()

Unnamed: 0_level_0,Area,Year,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,"Oil, coconut (copra)","Oil, cottonseed","Oil, groundnut","Oil, linseed","Oil, maize","Oil, olive, virgin","Oil, palm","Oil, palm kernel","Oil, rapeseed","Oil, safflower","Oil, sesame","Oil, soybean","Oil, sunflower"
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0


In [84]:
new_cols = ["Area", "Year", "coconut", "cottonseed", "groundnut", "linseed", "maize", "olive", "palm", "palm kernel", "rapeseed", "safflower", "sesame", "soybean", "sunflower"]
veg_oil_producing_countries.columns = new_cols
veg_oil_producing_countries

Unnamed: 0,Area,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10190,Zimbabwe,2015,,12400.0,8100.0,,14000.0,,,,,,,10400.0,2000.0
10191,Zimbabwe,2016,,6300.0,7000.0,,13500.0,,,,,,,9600.0,1400.0
10192,Zimbabwe,2017,,9300.0,7300.0,,14700.0,,,,,,,14400.0,1500.0
10193,Zimbabwe,2018,,10200.0,10100.0,,14898.0,,,,,,,11700.0,1800.0


In [85]:
# using the regex patter to rename the columns?
cols_to_rename = veg_oil_producing_countries.columns[3:]

pattern = r'(?<=Oil, ).+?(?= - \d)'

cols = [re.search(pattern, c, re.RegexFlag.IGNORECASE)[0] for c in cols_to_rename]
cols = [re.sub(' ', '_', c) for c in cols]
cols = [re.sub('\W', '', c) for c in cols]

veg_oil_producing_countries.columns = veg_oil_producing_countries.columns.tolist()[:3] + cols
veg_oil_producing_countries

TypeError: 'NoneType' object is not subscriptable

In [None]:
help(go.Choropleth)

In [None]:
# think getting the regex stuff to work is required for the plot
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=veg_oil_producing_countries['Area'],
        z=veg_oil_producing_countries[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(title_text = f'{veg_oil_type} Vegetable Oil',)

fig.show()

# need to format this. Needs to be larger and better quality.
# colour by oil type (eg palm oil = purple)
# shade a function of volume of that product (eg, light purple for small producers and dark purple for major producers)