In [204]:
# import necessary variables
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import geopandas as gpd
import warnings
warnings.filterwarnings('ignore')

## Load the data

In [205]:
df = pd.read_csv('data/FAOSTAT_data.csv')
df.head()

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20002002,2000-2002,%,88,E,Estimated value,
1,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20012003,2001-2003,%,89,E,Estimated value,
2,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20022004,2002-2004,%,92,E,Estimated value,
3,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20032005,2003-2005,%,93,E,Estimated value,
4,FS,Suite of Food Security Indicators,4,Afghanistan,6121,Value,21010,Average dietary energy supply adequacy (percen...,20042006,2004-2006,%,94,E,Estimated value,


## Clean and tidy the data

In [206]:
# lowercase the columns and remove the space bar
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace(' ', '_')

In [207]:
# check the null values
print(df.isnull().sum())

# # drop the null values
# df = df.dropna()

# drop the columns that are not needed
df = df.drop(['note'], axis=1)

# change the column name
df = df.rename(columns={'area': 'country'})

domain_code              0
domain                   0
area_code_(m49)          0
area                     0
element_code             0
element                  0
item_code                0
item                     0
year_code                0
year                     0
unit                     0
value                 9137
flag                     0
flag_description         0
note                129132
dtype: int64


In [210]:
# filter the data

# per capita food supply variability (kcal/cap/day)
df1 = df[df['item'] == 'Per capita food supply variability (kcal/cap/day)']

# convert the value column to float
df1['value'] = pd.to_numeric(df1['value'], errors='coerce')

# group by country and item
df1 = df1.groupby(['country', 'item'], as_index=False)['value'].mean()

# round the value column
df1['value'] = df1['value'].round(2)

## Data Visualization

Per Capita Food Supply Variability (kcal/cap/day)

### First Plot

In [211]:
# choropleth
fig = px.choropleth(df1, locations = 'country',
                    locationmode='country names',
                    color="value",
                    hover_name = 'country',
                    title = 'Per Capita Food Supply Variability (kcal/cap/day)',
                    color_continuous_scale=px.colors.sequential.Plasma
                    )
fig.update_layout(width=1000, height=800)
fig.show()

In [219]:
# time series data
df2 = df[['country', 'year', 'value']]
# convert the value column to float
df2['value'] = pd.to_numeric(df2['value'], errors='coerce')
# group the data by country and year
df2 = df2.groupby(['country', 'year']).mean().reset_index()
df2 = df2.pivot(index='country', columns='year', values='value')
df2.head()

year,2000,2000-2002,2001,2001-2003,2002,2002-2004,2003,2003-2005,2004,2004-2006,...,2017,2017-2019,2018,2018-2020,2019,2019-2021,2020,2020-2022,2021,2022
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,223.700588,59.38,224.575882,62.94,270.487368,54.16,271.884737,52.62,244.50381,51.23,...,359.908235,42.872727,307.894,42.986364,360.781765,42.558824,399.28,35.96,488.925455,426.945556
Albania,481.722273,52.45,530.579524,52.79,520.983636,53.08,539.859091,52.88,560.862727,52.99,...,928.068947,30.909091,1065.682941,31.566667,1084.625882,18.375,1200.874667,18.15,2105.637778,631.747143
Algeria,553.7925,45.26,640.562857,45.01,577.1525,44.92,653.721364,44.51,667.308636,44.67,...,774.687143,30.085,771.72381,29.955,672.041667,18.253333,848.272222,18.907692,1380.306364,456.571111
American Samoa,61.0525,,61.35,,61.675,,62.0,,50.026,,...,51.228,,51.228,,51.218,,51.218,,2.035,3.02
Andorra,38.786667,,44.4125,,40.442222,0.3,41.285556,0.3,42.087778,0.3,...,51.605,0.3,51.6425,0.3,51.715,0.3,57.377143,,2.325,3.02


In [220]:
absolute_change = df2['2021'] - df2['2000']
relative_change = ((df2['2021'] - df2['2000']) / df2['2000']) * 100

In [221]:
df2 = pd.DataFrame({
    'country': df2.index,
    '2000': df2['2000'],
    '2021': df2['2021'],
    'absolute_change': absolute_change,
    'relative_change': relative_change
})

df2['absolute_change'] = df2['absolute_change'].round(2)
df2['relative_change']=df2['relative_change'].apply(lambda x: int(x))
df2['relative_change']= df2['relative_change'].astype(str) + '%'

In [222]:
df2.drop(columns=['country'], inplace=True)
df2.reset_index(inplace=True)
df2.head()

Unnamed: 0,country,2000,2021,absolute_change,relative_change
0,Afghanistan,223.700588,488.925455,265.22,118%
1,Albania,481.722273,2105.637778,1623.92,337%
2,Algeria,553.7925,1380.306364,826.51,149%
3,American Samoa,61.0525,2.035,-59.02,-96%
4,Andorra,38.786667,2.325,-36.46,-94%


### Second Plot

In [229]:
fig = px.bar(df2, x='country', y=['2000', '2021'],
             title='Comparison of Values in 2000 and 2021',
             labels={'value': 'Value', 'variable': 'Year'},
             hover_data={'absolute_change': True, 'relative_change': True},
             barmode='group')

fig.show()

Number of children under 5 years of age who are stunted (modeled estimates) (million)

In [232]:
# check the item
value_to_check = 'Number of children under 5 years of age who are stunted (modeled estimates) (million)'
if value_to_check in df['item'].values:
    print("Exists")
else:
    print("Does not exist")

Exists


In [236]:
# Number of children under 5 years of age who are stunted (modeled estimates) (million) 
df3 = df[df['item'] == 'Number of children under 5 years of age who are stunted (modeled estimates) (million)']
# convert the value column to float
df3['value'] = pd.to_numeric(df3['value'], errors='coerce')

# group by country and item
df3 = df3.groupby(['country', 'item'], as_index=False)['value'].sum()

# round the value column
# df3['value'] = df3['value'].round(2)
df3.head()

Unnamed: 0,country,item,value
0,Afghanistan,Number of children under 5 years of age who ar...,54.3
1,Albania,Number of children under 5 years of age who ar...,0.7
2,Algeria,Number of children under 5 years of age who ar...,11.9
3,Angola,Number of children under 5 years of age who ar...,38.8
4,Argentina,Number of children under 5 years of age who ar...,6.9


In [None]:
# normalize the data

In [237]:
# choropleth
fig = px.choropleth(df3, locations = 'country',
                    locationmode='country names',
                    color="value",
                    hover_name = 'country',
                    title = 'Number of children under 5 years of age who are stunted (modeled estimates) (million)',
                    color_continuous_scale=px.colors.sequential.Plasma
                    )
fig.update_layout(width=1000, height=800)
fig.show()

In [238]:
# import highchart classes
from highcharts_core.chart import Chart
from highcharts_core.global_options.shared_options import SharedOptions
from highcharts_core.options import HighchartsOptions
from highcharts_core.options.plot_options.bar import BarOptions
from highcharts_core.options.series.bar import BarSeries