## Loading Packages

In [1]:
from fbprophet import Prophet
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from utils import *
import warnings

# plt.style.use('dark_background')
warnings.filterwarnings("ignore")

## Loading Data

In [2]:
def universe_select(path, commodity_name):
    """Selects the instruments believed to be of
    interest for the commodity selected
    Returns: A dictionary of dataframes which are
    intruments of interest"""
    universe_dict = {}
    
    if commodity_name == "Al": 
        aluminium_list = ["al_shfe", "al_lme", "al_comex_p", "al_comex_s", "al_lme_s", "yuan",
                 "bdi", "ted", "vix", "skew", "gsci"]
        
        for instrument in aluminium_list:
            df = pd.read_csv(path + instrument + ".csv", index_col='date', parse_dates=['date'], dayfirst=True).sort_index(ascending=True)
            universe_dict[instrument] = df
            
    elif commodity_name == "Cu":
        copper_list = ["cu_shfe", "cu_lme", "cu_comex_p", "cu_comex_s", "peso", "sol",
                 "bdi", "ted", "vix", "skew", "gsci"]
        
        for instrument in copper_list:
            df = pd.read_csv(path + instrument + ".csv", index_col='date', parse_dates=['date'], dayfirst=True).sort_index(ascending=True)
            universe_dict[instrument] = df
    
    else: print("Select an appropriate commodity")
    return universe_dict


In [3]:
path = "Data/"
universe_dict = universe_select(path, "Cu")

## Preprocessing

In [4]:
# Renaming the columns to price
universe_dict = price_rename(universe_dict)
# Cleaning the dataset of any erroneous datapoints
universe_dict = clean_dict_gen(universe_dict)
# Making sure that all the points in the window have consistent lenght
universe_dict = truncate_window_length(universe_dict)
# Generate the full training dataset
df_full = generate_dataset(universe_dict, lg_returns_only=True)

Included Instrument:
cu_shfe
cu_lme
cu_comex_p
cu_comex_s
peso
sol
bdi
ted
vix
skew
gsci


In [5]:
# Visualise the plots
# visualise_universe(universe_dict)
df = universe_dict["cu_lme"]

In [6]:
df_full.head()

Unnamed: 0_level_0,lg_return_cu_shfe,lg_return_cu_lme,lg_return_cu_comex_p,lg_return_cu_comex_s,lg_return_peso,lg_return_sol,lg_return_bdi,lg_return_ted,lg_return_vix,lg_return_skew,lg_return_gsci
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2006-08-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2006-08-30,-0.007511,0.007395,-0.017089,0.0,0.00093,-0.003395,0.011929,-0.006315,-0.004898,-0.008014,-0.001257
2006-08-31,-0.000296,0.033459,0.030438,-0.005077,0.004636,0.00247,0.013609,0.0,0.007338,-0.018494,0.003446
2006-09-01,0.036442,-0.014616,0.001728,-0.048758,-0.005194,0.0,0.007252,-0.009948,-0.028844,0.011057,-0.014828
2006-09-04,0.015561,0.008117,0.0,0.0,-0.003167,0.0,0.001805,0.0,0.0,0.0,0.0
