## Notes

* Try making a target column based on daily changes of the ICO composite index. Use a threshold (e.g., changes greater than ±10%); base this threshold on volatility or standard deviation.
* For all factors, provide a country weight based on previous production output.
* __In effect I am trying to predict production numbers__, since the ICO composite indicator index is a weighted sum of individual country data

In [1]:
import pandas as pd
import datetime

from capstone_modules import Production_Data

from statsmodels.tsa.seasonal import seasonal_decompose

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

### Coffee production data

In [3]:
data = Production_Data()
brazil_data = data.get_production_data("Brazilian Naturals")
brazil_data[brazil_data["Country"] == "Brazil"].head()
data.find_ICO_category_of_country("Sri Lanka")

['Robustas']

In [None]:
# Import data from CSV created in module
ICO_country_classifications = pd.read_csv("../datasets/capstone/ICO composite indicator index country classification.csv")
ICO_country_classifications.columns = ["Country", "Brazilian Naturals","Colombian Milds","Other Milds", "Robustas"]

# Split into dataframes of country names
brazlian_naturals_countries = ICO_country_classifications[ICO_country_classifications["Brazilian Naturals"]][["Country"]]
colombian_milds_countries = ICO_country_classifications[ICO_country_classifications["Colombian Milds"]][["Country"]]
other_milds_countries = ICO_country_classifications[ICO_country_classifications["Other Milds"]][["Country"]]
robustas_countries = ICO_country_classifications[ICO_country_classifications["Robustas"]][["Country"]]

### Import temperature data

In [None]:
temperature_data = pd.read_csv("../datasets/capstone/temperature-in-coffee-growing-regions--from-berkeley-earth.csv")
temperature_data["Unnamed: 0"] = pd.to_datetime(temperature_data["Unnamed: 0"].values)
temperature_data.index = temperature_data["Unnamed: 0"].values

In [None]:
minimum_temperatures_by_country = temperature_data.pivot_table(values = "Temperature (C)", index = temperature_data.index, columns = "Country", aggfunc='min')


In [None]:
minimum_temperatures_by_country[minimum_temperatures_by_country.index.year > 1989]["Brazil"].plot()
plt.title("Temperatures in Brazil")
plt.show()

In [None]:
brazil_temperature_seasonal_decomp = seasonal_decompose(minimum_temperatures_by_country["Brazil"], freq = 12)

brazil_temperature_seasonal_decomp.plot()
plt.title("Seasonal decomposition for temperatures in Brazil")
plt.show()

In [None]:
seasonal_decompose(brazil_temperature_seasonal_decomp.resid.fillna(0), freq = 12).plot()
plt.title("Seasonal decomposition of residual on Brazil temperatures")
plt.show()

### Importing ICO composite index prices

In [None]:
raw_ICO_index_data = pd.read_csv("../datasets/capstone/us_monthlycommoditypriceaverages_03927558512071.csv", encoding = "ISO-8859-1", header = 1)

In [None]:
ICO_index_data = raw_ICO_index_data.copy()
ICO_index_data = ICO_index_data.drop(labels = 0).T
ICO_index_data.rename(columns = ICO_index_data.loc["PERIOD", :], inplace = True)
ICO_index_data = ICO_index_data.drop(labels = ["PERIOD"])
ICO_index_data = ICO_index_data.apply(pd.to_numeric, errors = 'coerce')
ICO_index_data["time"] = pd.to_datetime(ICO_index_data.index.values, format = "%b%Y")

In [None]:
ICO_index_data.plot(figsize=(16,8))
sns.plt.show()

In [None]:
ICO_index_since_1989_data = ICO_index_data[ICO_index_data["time"] > datetime.date(1988,12,31)]
ICO_index_since_1989_data.index = ICO_index_since_1989_data["time"].values
ICO_index_since_1989_data = ICO_index_since_1989_data.drop("time", axis = 1)