In [2]:
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
class Production_Data(object):
    production_data = None
    ICO_categories = ["Brazilian Naturals", "Colombian Milds", "Other Milds", "Robustas"]
    countries_in_ICO_category = dict.fromkeys(ICO_categories, None)
    production_by_ICO_category = dict.fromkeys(ICO_categories, None)
    ending_stock_by_ICO_category = dict.fromkeys(ICO_categories, None)
    
    def __init__(self):
        self.production_data = pd.read_csv("../datasets/capstone/coffee-production--USDA-FAS--psd_coffee.csv")
        self.production_data = self.production_data[["Country_Name", "Market_Year", "Attribute_Description", "Value"]]
        
        self.production_data.loc[:, "Value (60kg bags)"] = self.production_data.loc[:, "Value"] * 1000
        self.production_data.drop("Value", axis = 1, inplace = True)
        
        ICO_country_classifications = pd.read_csv("../datasets/capstone/ICO composite indicator index country classification.csv")
        ICO_country_classifications.columns = ["Country", "Brazilian Naturals","Colombian Milds","Other Milds", "Robustas"]
        
        for category in self.ICO_categories:
            temp_dataframe = ICO_country_classifications[ICO_country_classifications[category]][["Country"]]
            temp_dataframe = temp_dataframe.merge(self.production_data, left_on = "Country", right_on = "Country_Name").drop("Country_Name", axis = 1)
            
            self.countries_in_ICO_category[category] = temp_dataframe["Country"].unique().tolist()
            self.ending_stock_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Ending Stocks"].drop("Attribute_Description", axis = 1)
            if category == "Robustas":
                self.production_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"].drop("Attribute_Description", axis = 1)
            else:
                self.production_by_ICO_category[category] = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop("Attribute_Description", axis = 1)
        
    def find_ICO_category_of_country(self, country_name = None):
        categories = list()
        for ICO_category in self.countries_in_ICO_category.keys():
            if country_name in self.countries_in_ICO_category[ICO_category]:
                categories.append(ICO_category)
        return categories
        
    def get_countries_in_category(self, ICO_category = "Brazilian Naturals"):
        return self.countries_in_ICO_category[ICO_category]
    def get_production_data_by_ICO_category(self, ICO_category = "Brazilian Naturals"):
        return self.production_by_ICO_category[ICO_category]
    
    def get_aggregate_production_data(self, ICO_category = "Brazilian Naturals"):
        return self.production_by_ICO_category[ICO_category].groupby(by = "Market_Year")["Value (60kg bags)"].sum()
    
    def get_ending_stocks_by_ICO_category(self, ICO_category = "Brazilian Naturals"):
        return self.ending_stock_by_ICO_category[ICO_category]
    
    def get_production_share(self, country_or_category = "All"):

        total_arabica_production = pd.pivot_table(data = self.production_data[self.production_data["Attribute_Description"] == "Arabica Production"], 
               values = ["Value (60kg bags)"], 
               index = ["Market_Year"], aggfunc = 'sum')["Value (60kg bags)"].to_dict()
        
        total_robusta_production = pd.pivot_table(data = self.production_data[self.production_data["Attribute_Description"] == "Robusta Production"], 
               values = ["Value (60kg bags)"], 
               index = ["Market_Year"], aggfunc = 'sum')["Value (60kg bags)"].to_dict()
        
#         temp_dataframe = self.production_data[(self.production_data["Attribute_Description"] == "Arabica Production") | (self.production_data["Attribute_Description"] == "Robusta Production")]
#         temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Arabica Production", "Value (60kg bags)"] = temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Arabica Production", "Value (60kg bags)"] / temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Arabica Production", "Market_Year"].map(total_arabica_production)
#         temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Robusta Production", "Value (60kg bags)"] = temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Robusta Production", "Value (60kg bags)"] / temp_dataframe.ix[temp_dataframe["Attribute_Description"] == "Robusta Production", "Market_Year"].map(total_robusta_production)
#         return(temp_dataframe)

        if country_or_category in self.ICO_categories:
            results = []
            for country in self.countries_in_ICO_category[country_or_category]:
                temp_dataframe = self.production_data[(self.production_data["Country_Name"] == country) 
                                                      & ((self.production_data["Attribute_Description"] == "Arabica Production") 
                                                         | (self.production_data["Attribute_Description"] == "Robusta Production"))]

                scaled_arabica_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Market_Year"].map(total_arabica_production)
                scaled_arabica_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
                scaled_arabica_production_dataframe = pd.concat([scaled_arabica_production_dataframe, scaled_arabica_production], axis = 1)
                scaled_arabica_production_dataframe.columns = ["Country_Name", "Market_Year", "Arabica Production Share"]

                scaled_robusta_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Market_Year"].map(total_robusta_production)
                scaled_robusta_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
                scaled_robusta_production_dataframe = pd.concat([scaled_robusta_production_dataframe, scaled_robusta_production], axis = 1)
                scaled_robusta_production_dataframe.columns = ["Country_Name", "Market_Year", "Robusta Production Share"]

                temp_dataframe = scaled_arabica_production_dataframe.merge(scaled_robusta_production_dataframe, on = ["Country_Name", "Market_Year"])
                results.append(temp_dataframe)

            return(pd.concat(results, ignore_index=True))

        elif country_or_category != "All":
            temp_dataframe = self.production_data[(self.production_data["Country_Name"] == country_or_category) 
                                                      & ((self.production_data["Attribute_Description"] == "Arabica Production") 
                                                         | (self.production_data["Attribute_Description"] == "Robusta Production"))]

            scaled_arabica_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Market_Year"].map(total_arabica_production)
            scaled_arabica_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
            scaled_arabica_production_dataframe = pd.concat([scaled_arabica_production_dataframe, scaled_arabica_production], axis = 1)
            scaled_arabica_production_dataframe.columns = ["Country_Name", "Market_Year", "Arabica Production Share"]

            scaled_robusta_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Market_Year"].map(total_robusta_production)
            scaled_robusta_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
            scaled_robusta_production_dataframe = pd.concat([scaled_robusta_production_dataframe, scaled_robusta_production], axis = 1)
            scaled_robusta_production_dataframe.columns = ["Country_Name", "Market_Year", "Robusta Production Share"]

            temp_dataframe = scaled_arabica_production_dataframe.merge(scaled_robusta_production_dataframe, on = ["Country_Name", "Market_Year"])
            return temp_dataframe
        
        else:        
            results = []
            for country in self.production_data["Country_Name"].unique():
                temp_dataframe = self.production_data[(self.production_data["Country_Name"] == country) 
                                                      & ((self.production_data["Attribute_Description"] == "Arabica Production") 
                                                         | (self.production_data["Attribute_Description"] == "Robusta Production"))]

                scaled_arabica_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"]["Market_Year"].map(total_arabica_production)
                scaled_arabica_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Arabica Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
                scaled_arabica_production_dataframe = pd.concat([scaled_arabica_production_dataframe, scaled_arabica_production], axis = 1)
                scaled_arabica_production_dataframe.columns = ["Country_Name", "Market_Year", "Arabica Production Share"]

                scaled_robusta_production = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Value (60kg bags)"] / temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"]["Market_Year"].map(total_robusta_production)
                scaled_robusta_production_dataframe = temp_dataframe[temp_dataframe["Attribute_Description"] == "Robusta Production"].drop(["Attribute_Description", "Value (60kg bags)"], axis = 1 )
                scaled_robusta_production_dataframe = pd.concat([scaled_robusta_production_dataframe, scaled_robusta_production], axis = 1)
                scaled_robusta_production_dataframe.columns = ["Country_Name", "Market_Year", "Robusta Production Share"]

                temp_dataframe = scaled_arabica_production_dataframe.merge(scaled_robusta_production_dataframe, on = ["Country_Name", "Market_Year"])
                results.append(temp_dataframe)

            return(pd.concat(results, ignore_index=True))

data = Production_Data()
data.get_production_share("Colombian Milds")
# pd.pivot_table(data = data.get_production_share(), columns = ["Country_Name","Attribute_Description"], index = ["Market_Year"]).reset_index()


Unnamed: 0,Country_Name,Market_Year,Arabica Production Share,Robusta Production Share
0,Colombia,1961,0.138465,0.000000
1,Colombia,1962,0.118120,0.000000
2,Colombia,1963,0.134226,0.000000
3,Colombia,1964,0.154185,0.000000
4,Colombia,1965,0.219433,0.000000
5,Colombia,1966,0.124911,0.000000
6,Colombia,1967,0.158803,0.000000
7,Colombia,1968,0.152120,0.000000
8,Colombia,1969,0.176304,0.000000
9,Colombia,1970,0.165573,0.000000


In [44]:
data = Production_Data()
data.find_ICO_category_of_country("Sri Lanka")
data.get_aggregate_production_data("Robustas").head()
data.get_countries_in_category("Robustas")[0:5]
data.get_ending_stocks("Robustas").head()

AttributeError: 'Production_Data' object has no attribute 'get_ending_stocks'

In [None]:
data.get_aggregate_production_data("Brazilian Naturals").plot()
data.get_aggregate_production_data("Colombian Milds").plot()
data.get_aggregate_production_data("Other Milds").plot()
data.get_aggregate_production_data("Robustas").plot()


plt.title("Annual coffee production by ICO category")
plt.legend(["Brazilian Naturals", "Colombian Milds", "Other Milds", "Robusta"], loc = 'best')
plt.xlabel("Market year")
plt.ylabel("Production (60 kg bags)")
plt.show()