In [85]:
import os
import pandas as pd
import matplotlib as mpl
import matplotlib_inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.spatial.ckdtree import coo_entries
import re
import plotly.graph_objects as go
from ipywidgets import widgets
from dataprep.clean import clean_country, validate_country

DATA = 'data'

In [86]:
veg_oil_producing_countries = pd.read_csv(os.path.join(DATA, 'FAOSTAT_vegetable_oil_production.csv'))
year = veg_oil_producing_countries['Year'].drop_duplicates(keep='first', inplace=False)
# veg_oil_by_country.info()

In [87]:
veg_oil_producing_countries = veg_oil_producing_countries[["Area", "Year", "Item", "Value"]].pivot(index=["Area", "Year"], columns="Item").reset_index()  # drop unneeded columns

In [88]:
new_cols = ["Country", "Year", "coconut", "cottonseed", "groundnut", "linseed", "maize", "olive", "palm", "palm kernel", "rapeseed", "safflower", "sesame", "soybean", "sunflower"]
veg_oil_producing_countries.columns = new_cols
veg_oil_producing_countries.head()

Unnamed: 0,Country,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0


In [89]:
veg_oil_producing_countries['country_val'] = validate_country(veg_oil_producing_countries[ "Country"])
veg_oil_producing_countries.head()

Unnamed: 0,Country,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower,country_val
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0,True
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0,True
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0,True
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0,True
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0,True


In [90]:
veg_oil_producing_countries = veg_oil_producing_countries.loc[veg_oil_producing_countries['country_val'] != False]

clean_country = clean_country(df=veg_oil_producing_countries, column="Country", output_format='alpha-3')
clean_country

  0%|          | 0/9 [00:00<?, ?it/s]

Country Cleaning Report:
	9276 values cleaned (100.0%)
Result contains 9276 (100.0%) values in the correct format and 0 null values (0.0%)


Unnamed: 0,Country,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower,country_val,Country_clean
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0,True,AFG
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0,True,AFG
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0,True,AFG
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0,True,AFG
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0,True,AFG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10190,Zimbabwe,2015,,12400.0,8100.0,,14000.0,,,,,,,10400.0,2000.0,True,ZWE
10191,Zimbabwe,2016,,6300.0,7000.0,,13500.0,,,,,,,9600.0,1400.0,True,ZWE
10192,Zimbabwe,2017,,9300.0,7300.0,,14700.0,,,,,,,14400.0,1500.0,True,ZWE
10193,Zimbabwe,2018,,10200.0,10100.0,,14898.0,,,,,,,11700.0,1800.0,True,ZWE


In [91]:
clean_country

Unnamed: 0,Country,Year,coconut,cottonseed,groundnut,linseed,maize,olive,palm,palm kernel,rapeseed,safflower,sesame,soybean,sunflower,country_val,Country_clean
0,Afghanistan,1961,,4997.0,,3531.0,,82.0,,,,,2253.0,,2938.0,True,AFG
1,Afghanistan,1962,,7716.0,,3701.0,,90.0,,,,,1876.0,,3138.0,True,AFG
2,Afghanistan,1963,,11742.0,,2857.0,,82.0,,,,,1831.0,,3138.0,True,AFG
3,Afghanistan,1964,,7960.0,,3377.0,,90.0,,,,,2722.0,,3138.0,True,AFG
4,Afghanistan,1965,,7926.0,,4327.0,,82.0,,,,,2821.0,,3238.0,True,AFG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10190,Zimbabwe,2015,,12400.0,8100.0,,14000.0,,,,,,,10400.0,2000.0,True,ZWE
10191,Zimbabwe,2016,,6300.0,7000.0,,13500.0,,,,,,,9600.0,1400.0,True,ZWE
10192,Zimbabwe,2017,,9300.0,7300.0,,14700.0,,,,,,,14400.0,1500.0,True,ZWE
10193,Zimbabwe,2018,,10200.0,10100.0,,14898.0,,,,,,,11700.0,1800.0,True,ZWE


In [92]:
# think getting the regex stuff to work is required for the plot
veg_oil_type = 'palm'
fig = go.Figure(
    data=go.Choropleth(
        locations=clean_country['Country_clean'],
        z=veg_oil_producing_countries[veg_oil_type],
        locationmode='ISO-3',  #  "ISO-3" | "USA-states" | "country names"
        colorscale='Viridis',
        colorbar_title=veg_oil_type
    )
)
fig.update_layout(title_text = f'{veg_oil_type} Vegetable Oil',)

fig.show()

# need to format this. Needs to be larger and better quality.
# colour by oil type (eg palm oil = purple)
# shade a function of volume of that product (eg, light purple for small producers and dark purple for major producers)