In [1]:
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [48]:
class Production_Data(object):
    production_data = None
    ICO_categories = ["Brazilian Naturals", "Colombian Milds", "Other Milds", "Robustas"]
    countries_in_ICO_category = dict.fromkeys(ICO_categories, None)
    production_by_ICO_category = dict.fromkeys(ICO_categories, None)
    ending_stock_by_ICO_category = dict.fromkeys(ICO_categories, None)
    
    def __init__(self):
        self.production_data = pd.read_csv("../datasets/capstone/coffee-production--USDA-FAS--psd_coffee.csv")
        self.production_data = self.production_data[["Country_Name", "Market_Year", "Attribute_Description", "Value"]]
        
        self.production_data.loc[:, "Value (60kg bags)"] = self.production_data.loc[:, "Value"] * 1000
        self.production_data.drop("Value", axis = 1, inplace = True)
        
        ICO_country_classifications = pd.read_csv("../datasets/capstone/ICO composite indicator index country classification.csv")
        ICO_country_classifications.columns = ["Country", "Brazilian Naturals","Colombian Milds","Other Milds", "Robustas"]
        
        for category in self.ICO_categories:
            temp_dataframe = ICO_country_classifications[ICO_country_classifications[category]][["Country"]]
            temp_dataframe = temp_dataframe.merge(self.production_data, left_on = "Country", right_on = "Country_Name").drop("Country_Name", axis = 1)
            
            self.countries_in_ICO_category[category] = temp_dataframe["Country"].unique().tolist()
            self.ending_stock_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Ending Stocks"].drop("Attribute_Description", axis = 1)
            if category == "Robustas":
                self.production_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"].drop("Attribute_Description", axis = 1)
            else:
                self.production_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop("Attribute_Description", axis = 1)
        
    def find_ICO_category_of_country(self, country_name = None):
        categories = list()
        for ICO_category in self.countries_in_ICO_category.keys():
            if country_name in self.countries_in_ICO_category[ICO_category]:
                categories.append(ICO_category)
        return categories
        
    def get_countries_in_category(self, ICO_category = "Brazilian Naturals"):
        return self.countries_in_ICO_category[ICO_category]
    def get_production_data_by_ICO_category(self, ICO_category = "Brazilian Naturals"):
        return self.production_by_ICO_category[ICO_category]
    
    def get_aggregate_production_data(self, ICO_category = "Brazilian Naturals"):
        return self.production_by_ICO_category[ICO_category].groupby(by = "Market_Year")["Value (60kg bags)"].sum()
    
    def get_ending_stocks_by_ICO_category(self, ICO_category = "Brazilian Naturals"):
        return self.ending_stock_by_ICO_category[ICO_category]
    
    def get_production_weights(self):
        # "Country", "Market_Year", "Arabica Production Weight", "Robusta Production Weight"
        results_dataframe = pd.DataFrame(columns = ["Country", "Market_Year", "Scaled Arabica Production Weight", "Scaled Robusta Production"])

        total_arabica_production = pd.pivot_table(data = self.production_data[self.production_data["Attribute_Description"] == "Arabica Production"], 
               values = ["Value (60kg bags)"], 
               index = ["Market_Year"], aggfunc = 'sum')["Value (60kg bags)"].to_dict()
        
        total_robusta_production = pd.pivot_table(data = self.production_data[self.production_data["Attribute_Description"] == "Robusta Production"], 
               values = ["Value (60kg bags)"], 
               index = ["Market_Year"], aggfunc = 'sum')["Value (60kg bags)"].to_dict()
        
        for country in self.production_data["Country_Name"].unique()[0:3]:
            temp_dataframe = self.production_data[(self.production_data["Country_Name"] == country) 
                                                  & ((self.production_data["Attribute_Description"] == "Arabica Production") 
                                                     | (self.production_data["Attribute_Description"] == "Robusta Production"))]
#             total_arabica_production_for_country = pd.pivot_table(data = self.production_data[(self.production_data["Attribute_Description"] == "Arabica Production") | (self.production_data["Attribute_Description"] == "Robusta Production")], 
#                values = ["Value (60kg bags)"], 
#                index = ["Market_Year"], aggfunc = 'sum')["Value (60kg bags)"].to_dict()
            scaled_arabica_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Market_Year"].map(total_arabica_production)
            scaled_arabica_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop(["Attribute_Description", "Value (60kg bags)", 1], axis = 1 )
        
            scaled_robusta_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Market_Year"].map(total_robusta_production)
            temp_dataframe = pd.concat([temp_dataframe, scaled_arabica_production, scaled_robusta_production], axis = 1)
            # MAKE SCALED ROBUSTA DATAFRAME; MERGE ON COUNTRY_NAME, MARKET_YEAR
#         self.production_data[[]]
        
#         self.production_data[self.production_data["Attribute_Description"] == "Arabica Production"]["Value (60kg bags)"] / self.production_data[self.production_data["Attribute_Description"] == "Arabica Production"]["Market_Year"].map(total_arabica_production)

data = Production_Data()
data.get_production_weights()

    Country_Name  Market_Year    0
0        Algeria         2003  0.0
19       Algeria         2004  0.0
38       Algeria         2005  0.0
57       Algeria         2006  0.0
76       Algeria         2007  0.0
95       Algeria         2008  0.0
114      Algeria         2009  0.0
133      Algeria         2010  0.0
152      Algeria         2011  0.0
171      Algeria         2012  0.0
190      Algeria         2013  0.0
209      Algeria         2014  0.0
228      Algeria         2015  0.0
247      Algeria         2016  0.0
266      Algeria         2017  0.0
     Country_Name  Market_Year         0
285        Angola         1961  0.000954
304        Angola         1962  0.000874
323        Angola         1963  0.000935
342        Angola         1964  0.001102
361        Angola         1965  0.001302
380        Angola         1966  0.000929
399        Angola         1967  0.001546
418        Angola         1968  0.001312
437        Angola         1969  0.001562
456        Angola         1970

In [44]:
data = Production_Data()
data.find_ICO_category_of_country("Sri Lanka")
data.get_aggregate_production_data("Robustas").head()
data.get_countries_in_category("Robustas")[0:5]
data.get_ending_stocks("Robustas").head()

AttributeError: 'Production_Data' object has no attribute 'get_ending_stocks'

In [None]:
data.get_aggregate_production_data("Brazilian Naturals").plot()
data.get_aggregate_production_data("Colombian Milds").plot()
data.get_aggregate_production_data("Other Milds").plot()
data.get_aggregate_production_data("Robustas").plot()


plt.title("Annual coffee production by ICO category")
plt.legend(["Brazilian Naturals", "Colombian Milds", "Other Milds", "Robusta"], loc = 'best')
plt.xlabel("Market year")
plt.ylabel("Production (60 kg bags)")
plt.show()