In [27]:
import pandas as pd
import numpy as np
import plotly.express as px
import os

### Loading and Pivoting

In [28]:
# get the current working directory
base_path = os.getcwd() + '//Data//'

# Loading data
domestic_consumption = pd.read_csv(base_path + 'Coffee_domestic_consumption.csv').drop(columns = ['Total_domestic_consumption'])
coffee_production = pd.read_csv(base_path + 'Coffee_production.csv').drop(columns = ['Total_production'])
coffee_inventory = pd.read_csv(base_path + 'Coffee_green_coffee_inventorie.csv')

# Pivoting dataframes long
domestic_consumption = pd.melt(domestic_consumption, id_vars=['Country', 'Coffee type']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Domestic Consumption'})
coffee_production = pd.melt(coffee_production, id_vars=['Country', 'Coffee type']).rename(columns = {'variable': 'Year',
                                                                                                           'value': 'Total Production'})

#Merging
merged = domestic_consumption.merge(coffee_production, left_on=['Country', 'Coffee type', 'Year'], 
                                    right_on=['Country', 'Coffee type', 'Year'])
merged.head(10)

Unnamed: 0,Country,Coffee type,Year,Domestic Consumption,Total Production
0,Angola,Robusta/Arabica,1990/91,1200000,3000000.0
1,Bolivia (Plurinational State of),Arabica,1990/91,1500000,7380000.0
2,Brazil,Arabica/Robusta,1990/91,492000000,1637160000.0
3,Burundi,Arabica/Robusta,1990/91,120000,29220000.0
4,Ecuador,Arabica/Robusta,1990/91,21000000,90240000.0
5,Indonesia,Robusta/Arabica,1990/91,74520000,446460000.0
6,Madagascar,Robusta,1990/91,21000000,58920000.0
7,Malawi,Arabica,1990/91,120000,6300000.0
8,Papua New Guinea,Arabica/Robusta,1990/91,180000,57780000.0
9,Paraguay,Arabica,1990/91,900000,7860000.0


### Examining total Production + Consumption

In [45]:
#Grouping by country
grouped_consumption = domestic_consumption[['Country', 'Domestic Consumption']].groupby(['Country']).sum().reset_index()
grouped_production = coffee_production[['Country', 'Total Production']].groupby(['Country']).sum().reset_index()

#Merging consumption/production dataframesss
merged_aggregated = merged[['Country', 
                                                                   'Domestic Consumption', 
                                                                   'Total Production' ]].groupby(['Country']).sum().reset_index()
merged_aggregated = pd.melt(merged_aggregated, id_vars=['Country'], 
                            value_vars=['Domestic Consumption', 'Total Production'])

total_fig = px.bar(merged_aggregated.sort_values(by = ['value'], ascending=False), 
                         x = 'Country', y = 'value',color = 'variable', title = "Total Consumption by Country")
total_fig.show()


'\nconsumption_fig = px.bar(grouped_consumption.sort_values(by = \'Domestic Consumption\', ascending= False), \n                         x = \'Country\', y = \'Domestic Consumption\', title = "Total Consumption by Country")\nproduction_fig = px.bar(grouped_production.sort_values(by = \'Total Production\', ascending= False), \n                         x = \'Country\', y = \'Total Production\', title = "Total Production by Country")\nconsumption_fig.show()\nproduction_fig.show()\n'

### Visualizing top Producers/Consumers over time

In [40]:
top_producers = grouped_production.sort_values(by = 'Total Production', ascending= False)['Country'][:10]
merged_aggregated = merged[['Country', 
                                                                   'Year',
                                                                   'Domestic Consumption', 
                                                                   'Total Production' ]].groupby(['Country', 'Year']).sum().reset_index()
merged_aggregated = pd.melt(merged_aggregated, id_vars=['Country', 'Year'], 
                            value_vars=['Domestic Consumption', 'Total Production'])

time_fig = px.line(merged_aggregated[merged_aggregated['Country'].isin(top_producers)],
                            x = 'Year', y = 'value', color = 'Country', line_dash = 'variable', 
                            title = 'Top 10 Countries Production versus Consumption')
time_fig.show()
