In [55]:
import pandas as pd
import numpy as np
import plotly.express as px
import os

### Creating Export Side Dataframe

In [69]:
# Getting the current working directory
base_path = os.getcwd() + '//Data//'

# Loading data and grouping to exclude coffee_type
domestic_consumption = pd.read_csv(base_path + 'Coffee_domestic_consumption.csv').drop(columns = ['Total_domestic_consumption'])
coffee_production = pd.read_csv(base_path + 'Coffee_production.csv').drop(columns = ['Total_production'])
coffee_export = pd.read_csv(base_path + 'Coffee_export.csv').drop(columns = ['Total_export'])

# Pivoting dataframes long and grouping to exclude coffee type since this isnt captured in export dataset
domestic_consumption = pd.melt(domestic_consumption, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Domestic Consumption'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_production = pd.melt(coffee_production, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Total Production'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_export = pd.melt(coffee_export, id_vars=['Country']).rename(columns = {'variable': 'Year','value': 'Total Export'}).groupby(['Country', 'Year']).sum().reset_index()

# Merging
export_df = domestic_consumption.merge(coffee_production, left_on=['Country',  'Year'], 
                                    right_on=['Country',  'Year'])
export_df['Year'] = export_df['Year'].str[:4]
export_df = export_df.merge(coffee_export, left_on=['Country',  'Year'], 
                                    right_on=['Country', 'Year'])

# Pivoting long
export_long_df = export_df.melt(id_vars=['Country', 'Year'])
export_long_df.head(10)
coffee_production.head(10)


Unnamed: 0,Country,Year,Total Production
0,Angola,1990/91,3000000.0
1,Angola,1991/92,4740000.0
2,Angola,1992/93,4680000.0
3,Angola,1993/94,1980000.0
4,Angola,1994/95,4620000.0
5,Angola,1995/96,3720000.0
6,Angola,1996/97,4260000.0
7,Angola,1997/98,3840000.0
8,Angola,1998/99,5100000.0
9,Angola,1999/00,3300000.0


#### Visualizing Production Side Dataframe

In [59]:
export_agg_df = export_long_df[['Country', 'variable', 'value']].groupby(['Country', 'variable']).sum().reset_index()
export_fig = px.bar(export_agg_df.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption and Production by Country")
export_fig.show()

In [73]:
#print(coffee_production.head(10))
top_producers = coffee_production.sort_values(by = 'Total Production', ascending= False)['Country'][:10]
#top_producers = coffee_production[]
'''
time_fig = px.line(merged_aggregated[merged_aggregated['Country'].isin(top_producers)],
                            x = 'Year', y = 'value', color = 'Country', line_dash = 'variable', 
                            title = 'Top 10 Countries Production versus Consumption')
time_fig.show()
'''


TypeError: '<' not supported between instances of 'float' and 'str'

### Creating Import Side Dataframe

In [None]:
# Loading data and grouping to exclude coffee_type
coffee_import = pd.read_csv(base_path + 'Coffee_import.csv').drop(columns = ['Total_import'])
coffee_importers_consumption = pd.read_csv(base_path + 'Coffee_importers_consumption.csv').drop(columns = ['Total_import_consumption'])
coffee_reexport = pd.read_csv(base_path + 'Coffee_re_export.csv').drop(columns = ['Total_re_export'])

# Pivoting dataframes long and 
coffee_import = pd.melt(coffee_import, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                'value': 'import'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_importers_consumption = pd.melt(coffee_importers_consumption, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                'value': 'import consumption'}).groupby(['Country', 'Year']).sum().reset_index()
coffee_reexport = pd.melt(coffee_reexport, id_vars=['Country']).rename(columns = {'variable': 'Year',
                                                                                  'value': 're-export'}).groupby(['Country', 'Year']).sum().reset_index()

# Merging
import_df = coffee_import.merge(coffee_importers_consumption, left_on=['Country',  'Year'], 
                                    right_on=['Country',  'Year'])
import_df['Year'] = import_df['Year'].str[:4]
import_df = import_df.merge(coffee_reexport, left_on=['Country',  'Year'], 
                                    right_on=['Country', 'Year'])
import_df.head(10)

Unnamed: 0,Country,Year,import,import consumption,re-export
0,Austria,1990,112800000,80400000,24900000
1,Austria,1991,123480000,78120000,45360000
2,Austria,1992,132360000,72720000,57600000
3,Austria,1993,110160000,80100000,32100000
4,Austria,1994,85020000,65160000,22860000
5,Austria,1995,73860000,60720000,13740000
6,Austria,1996,72600000,64560000,9780000
7,Austria,1997,77640000,65220000,13320000
8,Austria,1998,77580000,66300000,12600000
9,Austria,1999,90000000,67380000,23880000


In [70]:
import_agg_df = export_long_df[['Country', 'variable', 'value']].groupby(['Country', 'variable']).sum().reset_index()
import_fig = px.bar(export_agg_df.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption and Production by Country")
import_fig.show()