In [42]:
import pandas as pd
import numpy as np
import plotly.express as px
import os

### Creating Export Side Dataframe

In [43]:
# Getting the current working directory
base_path = os.getcwd() + '//Data//'

# Loading data and grouping to exclude coffee_type
domestic_consumption = pd.read_csv(base_path + 'Coffee_domestic_consumption.csv').drop(columns = ['Total_domestic_consumption', 'Coffee type'])
coffee_production = pd.read_csv(base_path + 'Coffee_production.csv').drop(columns = ['Total_production', 'Coffee type'])
coffee_export = pd.read_csv(base_path + 'Coffee_export.csv').drop(columns = ['Total_export'])

# Pivoting dataframes long and grouping to exclude coffee type since this isnt captured in export dataset
domestic_consumption = pd.melt(domestic_consumption, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Domestic Consumption'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_production = pd.melt(coffee_production, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Total Production'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_export = pd.melt(coffee_export, id_vars=['Country']).rename(columns = {'variable': 'Year','value': 'Total Export'}).groupby(['Country', 'Year']).sum().reset_index()

# Merging
export_df = domestic_consumption.merge(coffee_production, left_on=['Country',  'Year'], 
                                    right_on=['Country',  'Year'])
export_df['Year'] = export_df['Year'].str[:4]
export_df = export_df.merge(coffee_export, left_on=['Country',  'Year'], 
                                    right_on=['Country', 'Year'])

# Pivoting long and adding average change measure
export_long_df = export_df.melt(id_vars=['Country', 'Year']).sort_values(by = ['Country', 'variable', 'Year'])
export_long_df['Annual % Change'] = export_long_df['value'].pct_change()
export_long_df['Annual % Change'] = np.where(export_long_df['Year'] == '1990', 0, export_long_df['Annual % Change'])

# Adding rolling 3 year average
export_long_df['Three Rolling Average'] = export_long_df['value'].rolling(3).mean()
export_long_df['Three Rolling Average'] = np.where(export_long_df['Year'].isin(['1990','1991','1992']) , 0, export_long_df['Three Rolling Average'])
export_long_df['Year'] = pd.to_numeric(export_long_df['Year'])
export_long_df = export_long_df.fillna(0)

export_long_df.head(10)


Unnamed: 0,Country,Year,variable,value,Annual % Change,Three Rolling Average
0,Angola,1990,Domestic Consumption,1200000.0,0.0,0.0
1,Angola,1991,Domestic Consumption,1800000.0,0.5,0.0
2,Angola,1992,Domestic Consumption,2100000.0,0.166667,0.0
3,Angola,1993,Domestic Consumption,1200000.0,-0.428571,1700000.0
4,Angola,1994,Domestic Consumption,1500000.0,0.25,1600000.0
5,Angola,1995,Domestic Consumption,600000.0,-0.6,1100000.0
6,Angola,1996,Domestic Consumption,1200000.0,1.0,1100000.0
7,Angola,1997,Domestic Consumption,2400000.0,1.0,1400000.0
8,Angola,1998,Domestic Consumption,1800000.0,-0.25,1800000.0
9,Angola,1999,Domestic Consumption,1200000.0,-0.333333,1800000.0


#### Visualizing Production Side Dataframe

In [44]:
# Graphing all data
export_agg_df = export_long_df[['Country', 'variable', 'value']].groupby(['Country', 'variable']).sum().reset_index()
export_fig = px.bar(export_agg_df.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption and Production by Country, All Time")
export_fig.show()

# Graphing data from last 5 years

export_agg_df_5year = export_long_df[export_long_df['Year'] > 2016][['Country', 'variable', 'value']].groupby(['Country', 'variable']).sum().reset_index()
export_fig = px.bar(export_agg_df_5year.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption and Production by Country, Since 2017")
export_fig.show()


In [45]:
# Calculating top producer countries
top_producers = coffee_production[['Country', 'Total Production']].groupby(
                                                                        'Country'
                                                                           ).sum().reset_index().sort_values(by = 
                                                                            'Total Production', ascending= False)['Country'][:10]
# Graphing top producer countries over time
time_fig = px.line(export_long_df[(export_long_df['Country'].isin(top_producers)) &
                                  (export_long_df['variable']== 'Total Export')],
                            x = 'Year', y = 'Annual % Change', color = 'Country', line_dash = 'variable', 
                            title = 'Top 10 Total Production, Annual % Change')
time_fig.show()



In [46]:
# Graphing top producer countries over time
time_fig = px.line(export_long_df[(export_long_df['Country'].isin(top_producers)) &
                                  (export_long_df['variable']== 'Total Export') &
                                  (export_long_df['Year'] > 2012)],
                            x = 'Year', y = 'Three Rolling Average', color = 'Country', line_dash = 'variable', 
                            title = 'Top 10 Total Production, 3-Year rolling average')
time_fig.show()


In [47]:
# Graphing top producer countries over time
time_change_fig = px.line(export_long_df[export_long_df['Country'].isin(top_producers)],
                            x = 'Year', y = 'value', color = 'Country', line_dash = 'variable', 
                            title = 'Top 10 Producer Countries')
time_change_fig.show()

### Creating Import Side Dataframe

In [48]:
# Loading data and grouping to exclude coffee_type
coffee_import = pd.read_csv(base_path + 'Coffee_import.csv').drop(columns = ['Total_import'])
coffee_importers_consumption = pd.read_csv(base_path + 'Coffee_importers_consumption.csv').drop(columns = ['Total_import_consumption'])
coffee_reexport = pd.read_csv(base_path + 'Coffee_re_export.csv').drop(columns = ['Total_re_export'])

# Pivoting dataframes long and 
coffee_import = pd.melt(coffee_import, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                'value': 'import'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_importers_consumption = pd.melt(coffee_importers_consumption, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                'value': 'import consumption'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_reexport = pd.melt(coffee_reexport, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                  'value': 're-export'}).groupby(['Country', 'Year']).sum().reset_index()

# Merging
import_df = coffee_import.merge(coffee_importers_consumption, left_on=['Country',  'Year'], 
                                    right_on=['Country',  'Year'])
import_df['Year'] = import_df['Year'].str[:4]
import_df = import_df.merge(coffee_reexport, left_on=['Country',  'Year'], 
                                    right_on=['Country', 'Year'])

# Pivoting long and adding average change measure
import_long_df = import_df.melt(id_vars=['Country', 'Year']).sort_values(by = ['Country', 'variable', 'Year'])
import_long_df['Annual % Change'] = import_long_df['value'].pct_change()
import_long_df = import_long_df.fillna(0)
import_long_df['Annual % Change'] = np.where(import_long_df['Year'] == 1990, 0, import_long_df['Annual % Change'])
import_long_df.head(10)

Unnamed: 0,Country,Year,variable,value,Annual % Change
0,Austria,1990,import,112800000,0.0
1,Austria,1991,import,123480000,0.094681
2,Austria,1992,import,132360000,0.071914
3,Austria,1993,import,110160000,-0.167724
4,Austria,1994,import,85020000,-0.228214
5,Austria,1995,import,73860000,-0.131263
6,Austria,1996,import,72600000,-0.017059
7,Austria,1997,import,77640000,0.069421
8,Austria,1998,import,77580000,-0.000773
9,Austria,1999,import,90000000,0.160093


In [49]:
import_agg_df = export_long_df[['Country', 'variable', 'value']].groupby(['Country', 'variable']).sum().reset_index()
import_fig = px.bar(export_agg_df.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption and Production by Country")
import_fig.show()