In [None]:
#LOAD
#Define libraries
#---------------------------------------------------------------------------------------------
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Quantapp.Algorithm import Algorithm
from Quantapp.Computation import Computation
from Quantapp.Universe import Universe
import plotly.express as px
from IPython.display import display
import pandas as pd
from darts import concatenate
from darts import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries as dt_attr
from darts.utils.missing_values import fill_missing_values
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from sklearn.preprocessing import StandardScaler


from darts.models import *
from darts.metrics import *
import torch 
torch.cuda.empty_cache()
#-------------------------------------------------------------------------------------------------


#Load and Initialize Data
#-------------------------------------------------------------------------------------------------
plt.rcParams['figure.figsize'] = [10,7]
algorithm = Algorithm()
comp = Computation()
universe = Universe(['csv_files/S&P 500.csv'])

vis_range = 255
forecast_horizon = 21;

ticker = 'SPY'
period, interval = ('15y','1d')

drop_columns = ['Dividends', 'Stock Splits']
history = yf.Ticker(ticker).history(period=period,interval=interval)
history = history.drop(drop_columns, axis=1)

returns    = history.pct_change(forecast_horizon).asfreq('D').fillna(method='ffill') #insert missing dates and holidays and  forward fill
risk_free_rate = yf.Ticker("^IRX").history(period=period)['Close'].reindex(history.index).fillna(method='ffill')
windows = list(range(5,21,1)) + [50,200]
print(windows)

#-------------------------------------------------------------------------------------------------



#Define essential methods
#-------------------------------------------------------------------------------------------------
def copy_index(data,index):
        date_index = pd.date_range(start=index[0], end=index[-1], freq='D')
        data = data.reindex(date_index)
        data = data.fillna(method='ffill')
        return data

def download_data(arr):
    return yf.download(tickers = ' '.join(list(arr)),  # list of tickers
        period = period,         # time period
        interval = interval)

def flatten(df):
    new_df = pd.DataFrame()
    for col, ticker in df.columns:
        new_df[col + '_'+ ticker] = df[col,ticker]
    return new_df

#-------------------------------------------------------------------------------------------------



In [None]:

sp500_symbols  = universe.retrieve_universe()['Symbol']
sector_symbols = ['XLK, XLV, XLK, XLF, XLU, XLP, XLB, XLRE, XLI, XLE, XLC']

sectors = download_data(sector_symbols)
sectors_close = sectors['Adj Close'].reindex(history.index).fillna(method='ffill').fillna(0)

sectors_close_rate_of_change = flatten(comp.rolling_multi_windows(algorithm.rate_of_change,windows,sectors_close).dropna())




In [None]:
#extract_features = extract_features(history,column_id='Close',impute_function=impute)
#display(extract_features)
#display(history)

In [None]:
#Retrieves and calculates features for series

average_df  = comp.rolling_windows(algorithm.average, windows, history['Close'].pct_change(forecast_horizon)).dropna()
std_df      = comp.rolling_windows(algorithm.standard_deviation,windows,history['Close']).dropna()
semi_std_df = comp.rolling_windows(algorithm.semi_standard_deviation, windows, history['Close']).dropna()
skew_df     = comp.rolling_windows(algorithm.skew, windows, history['Close']).dropna()
kurtosis_df     = comp.rolling_windows(algorithm.kurtosis, windows, history['Close']).dropna()



df = pd.DataFrame()
df['Daily Returns'] = returns['Close'] 
df = pd.concat([df,average_df],axis=1)

px.line(df).show()
px.line(std_df).show()
px.line(semi_std_df).show()
px.line(skew_df).show()
px.line(kurtosis_df).show()




In [None]:
#Feature Selection and Reduction
features    = pd.concat([returns,average_df,std_df,semi_std_df,skew_df, kurtosis_df,sectors_close_rate_of_change],axis=1)
features = features.dropna()
features = features.asfreq('D').fillna(method='ffill')
scaler = StandardScaler()
scaler.fit(features)
features = pd.DataFrame(scaler.transform(features),index=features.index)
px.line(features)


In [None]:
#Feature preprocessing

#ts_past_covariates = TimeSeries.from_dataframe(features,freq="D")
#ts_returns     = TimeSeries.from_series(returns['Close'].loc[features.index].dropna(),freq="D")

features = returns['Close'].shift(-1).fillna(0)
ts_past_covariates = TimeSeries.from_series(features)
ts_returns     = TimeSeries.from_series(returns['Close'].fillna(0),freq="D")

training_set, testing_set               = ts_returns.split_before(0.75)
training_covariates, testing_covariates = ts_past_covariates.split_before(0.75)


In [None]:
#Model Creation
torch.cuda.empty_cache()

from darts.dataprocessing.transformers import Scaler

ns_k1_model = NaiveSeasonal(K=1)
ns_mean_model = NaiveMean()

lstm_model = BlockRNNModel(model='LSTM',
                           input_chunk_length=50, 
                           output_chunk_length=7,
                           pl_trainer_kwargs={
                                "accelerator": "gpu",
                                "devices": [0]
                            }
                    
                            )


'''
lstm_model = NHiTS(
                           input_chunk_length=50, 
                           output_chunk_length=7,
                           pl_trainer_kwargs={
                                "accelerator": "gpu",
                                "devices": [0]
                            },
                            add_encoders={
                                'cyclic': {'future': ['month']},
                                'datetime_attribute': {'future': ['month','day','weekday','dayofweek','day_of_week','dayofyear','day_of_year','week','weekofyear','week_of_year']},
                                'position': {'past': ['relative'], 'future': ['relative']},
                                'transformer': Scaler()
}
                            
                            )
'''
ns_k1_model.fit(training_set)
ns_mean_model.fit(training_set)
lstm_model.fit(training_set ,past_covariates=training_covariates,epochs=100)
#lstm_model.fit(training_set,epochs=100)



In [None]:
#Model Training / Testing


In [None]:
#Model Prediction / Backtesting


ns_k1_forecast = ns_k1_model.predict(len(testing_set)).pd_series().dropna()
ns_mean_forecast = ns_mean_model.predict(len(testing_set)).pd_series().dropna()
lstm_forecast = lstm_model.predict(7).pd_series().dropna()


training_set_ = training_set.pd_series().dropna()
testing_set_  = testing_set.pd_series().dropna()

forecast_ns_k1 = pd.concat([training_set_,testing_set_,ns_k1_forecast],axis=1)
forecast_ns_k1 = forecast_ns_k1 .rename(columns={0:'training set', 1:'testing set',2:'naive Seasonal forecast (k=1)'})
forecast_ns_k1 = forecast_ns_k1 [forecast_ns_k1.index.dayofweek < 5]

forecast_ns_mean= pd.concat([training_set_,testing_set_,ns_mean_forecast],axis=1)
forecast_ns_mean = forecast_ns_mean .rename(columns={0:'training set', 1:'testing set',2:'naive mean'})
forecast_ns_mean = forecast_ns_mean [forecast_ns_mean.index.dayofweek < 5]

forecast_lstm= pd.concat([training_set_,testing_set_,lstm_forecast],axis=1)
forecast_lstm = forecast_lstm .rename(columns={0:'training set', 1:'testing set',2:'naive lstm 21'})
forecast_lstm = forecast_lstm [forecast_lstm.index.dayofweek < 5]

px.line(forecast_ns_k1).show()
px.line(forecast_ns_mean).show()
px.line(forecast_lstm).show()


In [None]:
#Model Analysis
print(f"Mean squared error for Naive Seasonal Forecast is {mse(ts_returns,naive_seasonal_k1_forecast)}")