In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
from pypfopt import risk_models
from sklearn import preprocessing
from pypfopt.efficient_frontier import EfficientFrontier
from datetime import date, datetime, timedelta
from arch import arch_model
from pypfopt import expected_returns
import tensorflow as tf
from tqdm import tqdm

In [3]:
#Return calculation
def ReturnCalculation (Database,lag):
    dimension=Database.shape[0];dif=lag;Out=np.zeros([dimension-dif])
    for i in range(dimension-dif):
        Out[i]=(np.log(Database['Close'][i+dif])-np.log(Database['Close'][i]))
    return np.append(np.repeat(np.nan, dif),Out), Database.index

#STD Calculation
def SDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif])
    for i in range (dimension-dif):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(np.repeat(np.nan, dif),Out)

#STD Calculation
def TrueSDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif+1])
    for i in range (dimension-dif+1):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(Out,np.repeat(np.nan, dif-1))

#Database is calculated
def DatabaseGeneration (Database, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database,Lag)
    DailyReturnsOld =  np.append(np.repeat(np.nan, 1),DailyReturns[0:(DailyReturns.shape[0]-1)])
    SD = SDCalculation (DailyReturns, LagSD)
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)
    Data = pd.DataFrame({'DailyReturns': DailyReturns, 'SD': SD, 'TrueSD': TrueSD, 'DailyReturnsOld': DailyReturnsOld})
    Data = Data.set_index(Index) 
    return Data.dropna()


#Database is calculated
def M_DatabaseGeneration (Database_daily, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database_daily,Lag)    
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)    
    Data = pd.DataFrame({'DailyReturns': DailyReturns,'TrueSD': TrueSD})
    Data = Data.set_index(Index)
    Data = Data.dropna() 
    weekly_returns = Data['DailyReturns'].resample('W-FRI').sum()
    weekly_average_volatility = Data['TrueSD'].resample('W-FRI').mean()*np.sqrt(5)
 
    
    Data = pd.DataFrame({'DailyReturns': weekly_returns,'TrueSD': weekly_average_volatility})
    return Data.dropna()

In [4]:
# start = '2009-01-01';end = '2013-01-01'
start='2008-01-01'; end='2015-12-31'; 
asset = "^GSPC"
Lag=1; LagSD=5
IndexEndDays=yf.download(asset,start=start,  end=end, progress=False).resample('W-FRI').last().index
Database=yf.download(asset,start, end, progress=False).resample('W-FRI').last()

Data = DatabaseGeneration(Database, Lag, LagSD)
Data



Unnamed: 0_level_0,DailyReturns,SD,TrueSD,DailyReturnsOld
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-02-15,0.013949,0.041708,0.016481,-0.047047
2008-02-22,0.002308,0.043323,0.022702,0.013949
2008-02-29,-0.016753,0.033929,0.022685,0.002308
2008-03-07,-0.028401,0.035199,0.029389,-0.016753
2008-03-14,-0.004052,0.024224,0.029213,-0.028401
...,...,...,...,...
2015-11-06,0.009496,0.011685,0.024921,0.002027
2015-11-13,-0.036955,0.011768,0.029743,0.009496
2015-11-20,0.032165,0.022117,0.025132,-0.036955
2015-11-27,0.000450,0.026294,0.023524,0.032165


In [5]:
# start = '2009-01-01';end = '2013-01-01'
start='2008-01-01'; end='2015-12-31'; 
asset = "^GSPC"
Lag=1; LagSD=5
IndexEndDays=yf.download(asset,start=start,  end=end, progress=False).resample('W-FRI').last().index
Database=yf.download(asset,start, end, progress=False)
Database_daily=yf.download(asset,start, end, progress=False)
Data = M_DatabaseGeneration(Database_daily, Lag, LagSD)
Data


Unnamed: 0_level_0,DailyReturns,TrueSD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-01-04,-0.024858,0.036723
2008-01-11,-0.007545,0.034526
2008-01-18,-0.055645,0.036154
2008-01-25,0.004082,0.032903
2008-02-01,0.047558,0.034690
...,...,...
2015-11-27,0.000450,0.014414
2015-12-04,0.000756,0.030913
2015-12-11,-0.038659,0.025918
2015-12-18,-0.003395,0.032131


In [6]:
from arch.__future__ import reindexing

In [7]:
#Fitting of GARCH(1,1)
def GARCH_Model_Student (Data):
    AR_Data=Data['DailyReturns']*100
    GARCH11 = arch_model(AR_Data, dist ='t')
    res_GARCH11 = GARCH11.fit(disp='off')
    CV_GARCH11 = res_GARCH11.conditional_volatility
    For_CV_GARCH11 = np.array(res_GARCH11.forecast(horizon=4).variance.dropna())
    return GARCH11, res_GARCH11, CV_GARCH11, For_CV_GARCH11

def TARCH_Model_Student(Data):
    AR_Data=Data['DailyReturns']*100
    TARCH11 = arch_model(AR_Data, p=1, o=1, q=1, power=1.0, dist ='t')
    res_TARCH11 = TARCH11.fit(disp='off')
    CV_TARCH11 = res_TARCH11.conditional_volatility
    For_CV_TARCH11 = []
    for i in range(4):
        forecast = res_TARCH11.forecast(start=AR_Data.shape[0]-1, horizon=1)
        For_CV_TARCH11.append(forecast.variance.iloc[-1,:].values[0])
        AR_Data = np.append(AR_Data, forecast.mean.iloc[-1,:].values[0])
        TARCH11 = arch_model(AR_Data, p=1, o=1, q=1, power=1.0, dist ='t')
        res_TARCH11 = TARCH11.fit(disp='off')
    return TARCH11, res_TARCH11, CV_TARCH11, np.array(For_CV_TARCH11)



In [8]:
GARCH11, res_GARCH11, CV_GARCH11, For_CV_GARCH11 = TARCH_Model_Student (Data)


# Step 1, modify GARCH models to be multistep

If you find that the TARCH model does not support a horizon greater than 1, one workaround could be to implement recursive forecasting manually. This would involve using the model to make a one-step ahead forecast, appending that forecast to your time series, and then making the next one-step ahead forecast, and so on until you have made 4 forecasts. However, this approach would also be based on the assumption that future residuals are zero, and it would be computationally more intensive.

In [47]:
#Return calculation
def ReturnCalculation (Database,lag):
    dimension=Database.shape[0];dif=lag;Out=np.zeros([dimension-dif])
    for i in range(dimension-dif):
        Out[i]=(np.log(Database['Close'][i+dif])-np.log(Database['Close'][i]))
    return np.append(np.repeat(np.nan, dif),Out), Database.index

#STD Calculation
def SDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif])
    for i in range (dimension-dif):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(np.repeat(np.nan, dif),Out)

#STD Calculation
def TrueSDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif+1])
    for i in range (dimension-dif+1):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(Out,np.repeat(np.nan, dif-1))


#Database is calculated
def DatabaseGeneration (Database, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database,Lag)
    DailyReturnsOld =  np.append(np.repeat(np.nan, 1),DailyReturns[0:(DailyReturns.shape[0]-1)])
    SD = SDCalculation (DailyReturns, LagSD)
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)
    Data = pd.DataFrame({'DailyReturns': DailyReturns, 'SD': SD, 'TrueSD': TrueSD, 'DailyReturnsOld': DailyReturnsOld})
    Data = Data.set_index(Index) 
    return Data.dropna()

#Fitting of GARCH(1,1)
def GARCH_Model_Student (Data):
    AR_Data=Data['DailyReturns']*100
    GARCH11 = arch_model(AR_Data, dist ='t')
    res_GARCH11 = GARCH11.fit(disp='off')
    CV_GARCH11 = res_GARCH11.conditional_volatility
    For_CV_GARCH11 = np.array(res_GARCH11.forecast(horizon=4).variance.dropna())
    return GARCH11, res_GARCH11, CV_GARCH11, For_CV_GARCH11

#Fitting of GJR_GARCH(1,1)
def GJR_GARCH_Model_Student (Data):
    AR_Data=Data['DailyReturns']*100
    GJR_GARCH11 = arch_model(AR_Data, p=1, o=1, q=1, dist ='t')
    res_GJR_GARCH11 = GJR_GARCH11.fit(disp='off')
    CV_GJR_GARCH11 = res_GJR_GARCH11.conditional_volatility
    For_CV_GJR_GARCH11 = np.array(res_GJR_GARCH11.forecast(horizon=4).variance.dropna())
    return GJR_GARCH11, res_GJR_GARCH11, CV_GJR_GARCH11, For_CV_GJR_GARCH11

#Fitting of TARCH(1,1)
def TARCH_Model_Student(Data):
    AR_Data=Data['DailyReturns']*100
    TARCH11 = arch_model(AR_Data, p=1, o=1, q=1, power=1.0, dist ='t')
    res_TARCH11 = TARCH11.fit(disp='off')
    CV_TARCH11 = res_TARCH11.conditional_volatility
    For_CV_TARCH11 = np.array(res_TARCH11.forecast(horizon=4,method= "bootstrap").variance.dropna())
    return TARCH11, res_TARCH11, CV_TARCH11, For_CV_TARCH11

# #Fitting of EGARCH(1,1)
# def EGARCH_Model_Student(Data):
#     AR_Data=Data['DailyReturns']*100
#     EGARCH11 = arch_model(AR_Data, dist ='t', vol="EGARCH")
#     res_EGARCH11 = EGARCH11.fit(disp='off')
#     CV_EGARCH11 = res_EGARCH11.conditional_volatility
#     For_CV_EGARCH11 = np.array(res_EGARCH11.forecast(horizon=4,method="bootstrap").variance.dropna())
#     return EGARCH11, res_EGARCH11,CV_EGARCH11, For_CV_EGARCH11

#Fitting of TARCH(1,1)
def EGARCH_Model_Student(Data):
    AR_Data=Data['DailyReturns']*100
    EGARCH11 = arch_model(AR_Data, p=1, o=1, q=1, power=1.0, dist ='t')
    res_EGARCH11 = EGARCH11.fit(disp='off')
    CV_EGARCH11 = res_EGARCH11.conditional_volatility
    For_CV_EGARCH11 = np.array(res_EGARCH11.forecast(horizon=4,method= "bootstrap").variance.dropna())
    return EGARCH11, res_EGARCH11,CV_EGARCH11, For_CV_EGARCH11

#Fitting of Absolute Value GARCH(1,1)
def AVGARCH_Model_Student(Data):
    AR_Data=Data['DailyReturns']*100
    AVGARCH11 = arch_model(AR_Data, dist ='t', power=1)
    res_AVGARCH11 = AVGARCH11.fit(disp='off',options={'maxiter': 1000})
    CV_AVGARCH11 = res_AVGARCH11.conditional_volatility
    For_CV_AVGARCH11 = np.array(res_AVGARCH11.forecast(horizon=4,method="bootstrap").variance.dropna())
    return AVGARCH11, res_AVGARCH11, CV_AVGARCH11, For_CV_AVGARCH11

#Fitting of FIGARCH11(1,1)
def FIGARCH_Model_Student(Data):
    AR_Data=Data['DailyReturns']*100
    FIGARCH11 = arch_model(AR_Data, dist ='t', vol="FIGARCH")
    res_FIGARCH11 = FIGARCH11.fit(disp='off')
    CV_FIGARCH11 = res_FIGARCH11.conditional_volatility
    For_CV_FIGARCH11 = np.array(res_FIGARCH11.forecast(horizon=4,method="bootstrap").variance.dropna())
    return FIGARCH11, res_FIGARCH11, CV_FIGARCH11, For_CV_FIGARCH11

#this old code was inconsistent with the original, it may be been forecasting steps y2,y3,y4,y5 instead of y1,y2,y3,y4
# def Transformer_Database (Timestep, XData_AR, YData_AR):
#     Features = XData_AR.shape[1]
#     Sample = XData_AR.shape[0] - Timestep - 3  # Adjusted to allow for a 4-step-ahead target
#     XDataTrainScaledRNN = np.zeros([Sample, Timestep, Features])
#     YDataTrainRNN = np.zeros([Sample, 4])  # Adjusted for 4-step-ahead forecasts
    
#     for i in range(Sample):
#         XDataTrainScaledRNN[i,:,:] = XData_AR[i:(Timestep+i)]
#         YDataTrainRNN[i, :] = YData_AR[(Timestep+i):(Timestep+i+4)]  # 4-step-ahead target
    
#     return XDataTrainScaledRNN, YDataTrainRNN

def Transformer_Database (Timestep, XData_AR, YData_AR):
    Features = XData_AR.shape[1]
    Sample = XData_AR.shape[0] - Timestep - 2  # Adjusted to allow for a 4-step-ahead target
    XDataTrainScaledRNN = np.zeros([Sample, Timestep, Features])
    YDataTrainRNN = np.zeros([Sample, 4])  # Adjusted for 4-step-ahead forecasts
    
    for i in range(Sample):
        XDataTrainScaledRNN[i,:,:] = XData_AR[i:(Timestep+i)]
        YDataTrainRNN[i, :] = YData_AR[(Timestep+i-1):(Timestep+i+3)]  # 4-step-ahead target
    
    return XDataTrainScaledRNN, YDataTrainRNN

#MultiHeadSelfAttention
class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        self.projection_dim = embed_dim // num_heads
        self.query_dense = tf.keras.layers.Dense(embed_dim)
        self.key_dense = tf.keras.layers.Dense(embed_dim)
        self.value_dense = tf.keras.layers.Dense(embed_dim)
        self.combine_heads = tf.keras.layers.Dense(embed_dim)
    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights
    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(query, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(key, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(value, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(concat_attention)  # (batch_size, seq_len, embed_dim)
        return output
        
#Transformer Keras Block
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        # self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.nb_dict = {}; self.Bagging=5
        for i in range(self.Bagging):
          self.nb_dict["att{0}".format(i)]=MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([tf.keras.layers.Dense(ff_dim, activation="relu"), tf.keras.layers.Dense(embed_dim),])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
    def call(self, inputs, training):
        self.att_dict = {}
        for i in range(self.Bagging):
          self.att_dict["att{0}".format(i)]=self.nb_dict["att{0}".format(i)](tf.keras.layers.Dropout(.1)(inputs))
          if i==0: 
            self.att_dict["attn_output"]=self.att_dict["att{0}".format(i)]/self.Bagging 
          else: 
            self.att_dict["attn_output"]=self.att_dict["attn_output"]+self.att_dict["att{0}".format(i)]/self.Bagging
        attn_output = self.dropout1(self.att_dict["attn_output"], training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
#Database is calculated
def DatabaseGenerationForecast (Database, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database,Lag)
    DailyReturnsOld =  np.append(np.repeat(np.nan, 1),DailyReturns[0:(DailyReturns.shape[0]-1)])
    SD = SDCalculation (DailyReturns, LagSD)
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)
    Data = pd.DataFrame({'DailyReturns': DailyReturns, 'SD': SD, 'TrueSD': TrueSD, 'DailyReturnsOld': DailyReturnsOld})
    Data = Data.set_index(Index) 
    return Data

#Database is calculated
def M_DatabaseGenerationForecast (Database_daily, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database_daily,Lag)    
    TrueSD = TrueSDCalculation(DailyReturns, LagSD)    
    Data = pd.DataFrame({'DailyReturns': DailyReturns,'TrueSD': TrueSD})
    Data = Data.set_index(Index)
    Data = Data.dropna() 
    weekly_returns = Data['DailyReturns'].resample('W-FRI').sum()
    weekly_average_volatility = Data['TrueSD'].resample('W-FRI').mean()*np.sqrt(5)
    
    Data = pd.DataFrame({'DailyReturns': weekly_returns,'TrueSD': weekly_average_volatility})
    return Data.dropna()

def Transformer_Model (Shape1, Shape2, HeadsAttention,Dropout, LearningRate):
    #Model struture is defined
    Input = tf.keras.Input(shape=(Shape1,Shape2), name="Input")
    #LSTM is applied on top of the transformer
    X = tf.keras.layers.LSTM(units=16, dropout=Dropout, return_sequences=True)(Input)
    #Tranformer architecture is implemented
    transformer_block_1 = TransformerBlock(embed_dim=16, num_heads=HeadsAttention, ff_dim=8, rate=Dropout)
    X = transformer_block_1(X)
    #Dense layers are used
    X = tf.keras.layers.GlobalAveragePooling1D()(X)
    X = tf.keras.layers.Dense(8, activation=tf.nn.sigmoid)(X)
    X = tf.keras.layers.Dropout(Dropout)(X)
    Output = tf.keras.layers.Dense(4, activation=tf.nn.sigmoid, name="Output")(X)
    model = tf.keras.Model(inputs=Input, outputs=Output)
    #Optimizer is defined
    Opt = tf.keras.optimizers.legacy.Adam(learning_rate=LearningRate, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,name='Adam')
    #Model is compiled
    model.compile(optimizer=Opt, loss='mean_squared_error', metrics=['mean_squared_error'])
    return model



def DatabaseGenerationForecast_AR (Database, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH):
    Data_Forecast=M_DatabaseGenerationForecast(Database, Lag, LagSD).iloc[(-2+1)]
    Index_Forecast=M_DatabaseGenerationForecast(Database, Lag, LagSD).index[(-2+1)]
    XDataForecast=[]
    # Flatten the double-nested lists
    For_CV_GARCH = [item for sublist in For_CV_GARCH for item in sublist]
    For_CV_GJR_GARCH = [item for sublist in For_CV_GJR_GARCH for item in sublist]
    For_CV_TARCH = [item for sublist in For_CV_TARCH for item in sublist]
    For_CV_EGARCH = [item for sublist in For_CV_EGARCH for item in sublist]
    For_CV_AVGARCH = [item for sublist in For_CV_AVGARCH for item in sublist]
    For_CV_FIGARCH = [item for sublist in For_CV_FIGARCH for item in sublist]
    for i in range(len(For_CV_AVGARCH)):
        forecast={'CV_GARCH' : For_CV_GARCH[i]/100, 'CV_GJR_GARCH' : For_CV_GJR_GARCH[i]/100, 'CV_TARCH' : For_CV_TARCH[i]/100, 
               'CV_EGARCH' : For_CV_EGARCH[i]/100, 'CV_AVGARCH' : For_CV_AVGARCH[i]/100, 'CV_FIGARCH' : For_CV_FIGARCH[i]/100}
        XDataForecast.append(pd.DataFrame([forecast], index=[Index_Forecast]))
    XDataForecast = pd.concat(XDataForecast)
    return XDataForecast, Data_Forecast['DailyReturns']

def T_ANN_ARCH_Forecast (Database,Timestep, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH,Scaled_Norm, XData_AR, model):
    XDataForecast, ReturnForecast = DatabaseGenerationForecast_AR (Database, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH)
    XDataForecast = pd.concat([XData_AR,XDataForecast])
    XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
    XDataForecastTotalScaled_T, Y_T = Transformer_Database(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
    TransformerPrediction = model.predict(XDataForecastTotalScaled_T)
    return TransformerPrediction[-2], XDataForecast.index[-1], TransformerPrediction[0:(XDataForecastTotalScaled_T.shape[0]-1)], ReturnForecast





## Step 2: Prepare the data for the Transformer model:

In the Transformer_Database function, you need to adjust the data preparation process to handle the 4-step-ahead forecast vectors from the ARCH models. This likely involves changes to how the X and Y arrays are constructed.

In [10]:
IndexEndDays=yf.download(asset,start=start,  end=end, progress=False).resample('W-FRI').last().index
i = 4
Database=yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date(), progress=False).resample('W-FRI').last()
Database_daily = yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date()  , progress=False)
#Database for fitting the models is generated
LagSD=5
Data = M_DatabaseGeneration(Database_daily, Lag, LagSD)
Data

Unnamed: 0_level_0,DailyReturns,TrueSD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-12-16,-0.000087,0.008076
2005-12-23,0.001057,0.010863
2005-12-30,-0.016187,0.019497
2006-01-06,0.029334,0.011155
2006-01-13,0.001679,0.011531
...,...,...
2007-12-28,-0.004030,0.019568
2008-01-04,-0.046276,0.031468
2008-01-11,-0.007545,0.034526
2008-01-18,-0.055645,0.036154


In [38]:
Lag=1; LagSD=5; Timestep=10; Dropout=0.05; LearningRate=0.01; Epochs = 100;BatchSize=64
GARCH, GARCH_Parameters, CV_GARCH, For_CV_GARCH = GARCH_Model_Student(Data)
GJR_GARCH, GJR_GARCH_Parameters, CV_GJR_GARCH, For_CV_GJR_GARCH = GJR_GARCH_Model_Student(Data)
TARCH, TARCH_Parameters, CV_TARCH, For_CV_TARCH = TARCH_Model_Student(Data)
EGARCH, EGARCH_Parameters,CV_EGARCH, For_CV_EGARCH = EGARCH_Model_Student(Data)
AVGARCH, AVGARCH_Parameters,CV_AVGARCH, For_CV_AVGARCH = AVGARCH_Model_Student(Data)
FIGARCH, FIGARCH_Parameters,CV_FIGARCH, For_CV_FIGARCH  = FIGARCH_Model_Student(Data)
#Database contaning AR models is generated
Data_AR=pd.concat([Data, CV_GARCH.rename('CV_GARCH')/100, CV_GJR_GARCH.rename('CV_GJR_GARCH')/100, CV_TARCH.rename('CV_TARCH')/100, 
                    CV_EGARCH.rename('CV_EGARCH')/100, CV_AVGARCH.rename('CV_AVGARCH')/100, CV_FIGARCH.rename('CV_FIGARCH')/100], axis=1)
if Data_AR.shape[0]!=Data.shape[0]: print("Error in DB Generation")
# #Original explanatory and response variables are generated
XData_AR = Data_AR.drop(['TrueSD','DailyReturns'], axis=1);YData_AR = Data_AR['TrueSD']
# #Data is normalized
Scaled_Norm = preprocessing.StandardScaler().fit(XData_AR); XData_AR_Norm = Scaled_Norm.transform(XData_AR)
#Data for fitting the transformer model is generated
XData_AR_Norm_T, YData_AR_Norm_T= Transformer_Database(Timestep, XData_AR_Norm, YData_AR)
# #Model with transformer layer is defined
model = Transformer_Model(XData_AR_Norm_T.shape[1], XData_AR_Norm_T.shape[2], HeadsAttention=4, Dropout=Dropout, LearningRate=LearningRate) #this shifts weekly True SD is working fine here
model.fit(XData_AR_Norm_T, YData_AR_Norm_T, epochs=Epochs, verbose=0, batch_size=BatchSize); tf.keras.backend.clear_session()
#T_ANN_ARCH_Forecast
XDataForecast, ReturnForecast = DatabaseGenerationForecast_AR (Database_daily, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH)
XDataForecast = pd.concat([XData_AR,XDataForecast*1/np.sqrt(5)])
XDataForecastTotalScaled = Scaled_Norm.transform(XDataForecast)
XDataForecastTotalScaled_T, Y_T = Transformer_Database(Timestep, XDataForecastTotalScaled, np.zeros(XDataForecastTotalScaled.shape[0]))
TransformerPrediction = model.predict(XDataForecastTotalScaled_T)




In [43]:
YData_AR_Norm_T

array([[0.01191176, 0.01445102, 0.01174104, 0.01137294],
       [0.01445102, 0.01174104, 0.01137294, 0.0094715 ],
       [0.01174104, 0.01137294, 0.0094715 , 0.01062454],
       [0.01137294, 0.0094715 , 0.01062454, 0.01095153],
       [0.0094715 , 0.01062454, 0.01095153, 0.01341099],
       [0.01062454, 0.01095153, 0.01341099, 0.01595017],
       [0.01095153, 0.01341099, 0.01595017, 0.01108245],
       [0.01341099, 0.01595017, 0.01108245, 0.00899191],
       [0.01595017, 0.01108245, 0.00899191, 0.0135342 ],
       [0.01108245, 0.00899191, 0.0135342 , 0.01600366],
       [0.00899191, 0.0135342 , 0.01600366, 0.01556924],
       [0.0135342 , 0.01600366, 0.01556924, 0.02251559],
       [0.01600366, 0.01556924, 0.02251559, 0.02504215],
       [0.01556924, 0.02251559, 0.02504215, 0.01757864],
       [0.02251559, 0.02504215, 0.01757864, 0.0256787 ],
       [0.02504215, 0.01757864, 0.0256787 , 0.01695107],
       [0.01757864, 0.0256787 , 0.01695107, 0.02283147],
       [0.0256787 , 0.01695107,

In [41]:
XData_AR_Norm_T.shape[2]


6

In [39]:
XDataForecast.tail(10)

Unnamed: 0,CV_GARCH,CV_GJR_GARCH,CV_TARCH,CV_EGARCH,CV_AVGARCH,CV_FIGARCH
2007-12-21,0.020527,0.023035,0.022328,0.022328,0.022328,0.020952
2007-12-28,0.020579,0.022727,0.02244,0.02244,0.02244,0.021529
2008-01-04,0.02063,0.022468,0.022552,0.022552,0.022552,0.020496
2008-01-11,0.020682,0.025162,0.022664,0.022664,0.022664,0.019174
2008-01-18,0.020733,0.02492,0.022776,0.022776,0.022776,0.02447
2008-01-25,0.020785,0.028402,0.022888,0.022888,0.022888,0.023615
2008-01-25,0.019406,0.035062,0.023739,0.023739,0.023738,0.038863
2008-01-25,0.019502,0.035167,0.02397,0.02397,0.023969,0.034621
2008-01-25,0.019597,0.035272,0.024203,0.024203,0.024202,0.03316
2008-01-25,0.019692,0.035377,0.024437,0.024437,0.024435,0.032037


In [85]:
TransformerPrediction[-2]

array([0.03037949, 0.03800619, 0.02900026, 0.03645329], dtype=float32)

In [48]:
def T_ANN_ARCH_Fit (Data,Database,Lag=1, LagSD=5, Timestep=10, Dropout=0.05, LearningRate=0.01, Epochs=1000, BatchSize=64):
    GARCH, GARCH_Parameters, CV_GARCH, For_CV_GARCH = GARCH_Model_Student(Data)
    GJR_GARCH, GJR_GARCH_Parameters, CV_GJR_GARCH, For_CV_GJR_GARCH = GJR_GARCH_Model_Student(Data)
    TARCH, TARCH_Parameters, CV_TARCH, For_CV_TARCH = TARCH_Model_Student(Data)
    EGARCH, EGARCH_Parameters,CV_EGARCH, For_CV_EGARCH = EGARCH_Model_Student(Data)
    AVGARCH, AVGARCH_Parameters,CV_AVGARCH, For_CV_AVGARCH = AVGARCH_Model_Student(Data)
    FIGARCH, FIGARCH_Parameters,CV_FIGARCH, For_CV_FIGARCH  = FIGARCH_Model_Student(Data)
    #Database contaning AR models is generated
    Data_AR=pd.concat([Data, CV_GARCH.rename('CV_GARCH')/100, CV_GJR_GARCH.rename('CV_GJR_GARCH')/100, CV_TARCH.rename('CV_TARCH')/100, 
                        CV_EGARCH.rename('CV_EGARCH')/100, CV_AVGARCH.rename('CV_AVGARCH')/100, CV_FIGARCH.rename('CV_FIGARCH')/100], axis=1)
    if Data_AR.shape[0]!=Data.shape[0]: print("Error in DB Generation")
    # #Original explanatory and response variables are generated
    XData_AR = Data_AR.drop(['TrueSD','DailyReturns'], axis=1);YData_AR = Data_AR['TrueSD']
    # #Data is normalized
    Scaled_Norm = preprocessing.StandardScaler().fit(XData_AR); XData_AR_Norm = Scaled_Norm.transform(XData_AR)
    #Data for fitting the transformer model is generated
    XData_AR_Norm_T, YData_AR_Norm_T= Transformer_Database(Timestep, XData_AR_Norm, YData_AR)
    #Model with transformer layer is defined
    model = Transformer_Model(XData_AR_Norm_T.shape[1], XData_AR_Norm_T.shape[2], HeadsAttention=4, Dropout=Dropout, LearningRate=LearningRate)
    model.fit(XData_AR_Norm_T, YData_AR_Norm_T, epochs=Epochs, verbose=0, batch_size=BatchSize); tf.keras.backend.clear_session()
    Forecast, Date_Forecast, TrainPrediction, ReturnForecast = T_ANN_ARCH_Forecast (Database,Timestep, Lag, LagSD, For_CV_GARCH, For_CV_GJR_GARCH, For_CV_TARCH, For_CV_EGARCH, For_CV_AVGARCH, For_CV_FIGARCH,Scaled_Norm, XData_AR, model)
    return {'Date_Forecast':Date_Forecast,'Forecast_T_ANN_ARCH':Forecast }


## Dropout  = 0.01

In [55]:
#Index of end dates, database for validation and dataframe to collect the results are created. Model variables are defined.
Start='2008-01-01'; End='2015-12-31'; 
asset = "BA"
# asset_name = re.sub('[\W\d_]+', '', asset)
IndexEndDays=yf.download(asset,start=Start,  end=End, progress=False).resample('W-FRI').last().index

Lag=1; LagSD=5; Timestep=10; Dropout=0.1; LearningRate=0.01; Epochs=100

DataValidation = DatabaseGeneration(yf.download(asset,start='2000-01-01', end=date.today()+timedelta(days=1), progress=False).resample('W-FRI').last(), Lag, LagSD)

ResultsCollection=pd.DataFrame({'Date_Forecast': [], 'h1': [], 'h2': [], 'h3':[], 'h4': [],'TrueSD':[]})
#Loop for generating the results
for i in tqdm(range(IndexEndDays.shape[0])):
    #Database is downloaded from yahoo finance and lag of returns defined
    Database=yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date(), progress=False).resample('W-FRI').last()
    Database_daily = yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date()  , progress=False)

    #Database for fitting the models is generated
    Data = M_DatabaseGeneration(Database_daily, Lag, LagSD)
    #Fitting of Transformed ANN-ARCH model, ARCH models and forecasting of the next volatility value
    T_ANN_ARCH_Model = T_ANN_ARCH_Fit (Data,Database_daily, Lag, LagSD, Timestep, Dropout, LearningRate, Epochs)

    
    IterResults={'Date_Forecast': T_ANN_ARCH_Model['Date_Forecast'].date(), 'h1': T_ANN_ARCH_Model['Forecast_T_ANN_ARCH'][0], 'h2': T_ANN_ARCH_Model['Forecast_T_ANN_ARCH'][1],
                 'h3': T_ANN_ARCH_Model['Forecast_T_ANN_ARCH'][2], 'h4': T_ANN_ARCH_Model['Forecast_T_ANN_ARCH'][3],'TrueSD':Data['TrueSD'][-1]}
    
    IterResults_df = pd.DataFrame(IterResults,index =[0])
    ResultsCollection = ResultsCollection.append(IterResults_df, ignore_index=True)

    # ResultsCollection.to_csv(f'./assets/5_MTL_GARCH_{asset_name}.csv',index=False)
    ResultsCollection.to_csv(f'./BA.csv',index=False)


  0%|          | 0/418 [00:00<?, ?it/s]



  0%|          | 1/418 [00:07<50:02,  7.20s/it]



  0%|          | 2/418 [00:13<45:34,  6.57s/it]



  1%|          | 3/418 [00:19<44:18,  6.41s/it]



  1%|          | 4/418 [00:26<45:20,  6.57s/it]



  1%|          | 5/418 [00:32<44:13,  6.42s/it]



  1%|▏         | 6/418 [00:38<43:39,  6.36s/it]



  2%|▏         | 7/418 [00:44<42:42,  6.23s/it]



  2%|▏         | 8/418 [00:50<40:47,  5.97s/it]



  2%|▏         | 9/418 [00:55<39:34,  5.81s/it]



  2%|▏         | 10/418 [01:01<38:55,  5.72s/it]



  3%|▎         | 11/418 [01:07<39:28,  5.82s/it]



  3%|▎         | 12/418 [01:12<38:49,  5.74s/it]



  3%|▎         | 13/418 [01:18<39:32,  5.86s/it]



  3%|▎         | 14/418 [01:25<41:11,  6.12s/it]



  4%|▎         | 15/418 [01:31<41:07,  6.12s/it]



  4%|▍         | 16/418 [01:37<41:00,  6.12s/it]



  4%|▍         | 17/418 [01:43<40:55,  6.12s/it]



  4%|▍         | 18/418 [01:50<42:23,  6.36s/it]



  5%|▍         | 19/418 [01:58<44:23,  6.67s/it]



  5%|▍         | 20/418 [02:04<43:22,  6.54s/it]



  5%|▌         | 21/418 [02:11<43:26,  6.57s/it]



  5%|▌         | 22/418 [02:17<42:31,  6.44s/it]



  6%|▌         | 23/418 [02:24<44:32,  6.76s/it]



  6%|▌         | 24/418 [02:31<44:13,  6.73s/it]



  6%|▌         | 25/418 [02:36<41:30,  6.34s/it]



  6%|▌         | 26/418 [02:42<39:21,  6.02s/it]



  6%|▋         | 27/418 [02:47<38:00,  5.83s/it]



  7%|▋         | 28/418 [02:53<38:16,  5.89s/it]



  7%|▋         | 29/418 [02:58<37:15,  5.75s/it]



  7%|▋         | 30/418 [03:04<36:27,  5.64s/it]



  7%|▋         | 31/418 [03:09<35:55,  5.57s/it]



  8%|▊         | 32/418 [03:15<36:51,  5.73s/it]



  8%|▊         | 33/418 [03:21<36:35,  5.70s/it]



  8%|▊         | 34/418 [03:28<38:15,  5.98s/it]



  8%|▊         | 35/418 [03:34<39:10,  6.14s/it]



  9%|▊         | 36/418 [03:40<37:44,  5.93s/it]



  9%|▉         | 37/418 [03:45<36:42,  5.78s/it]



  9%|▉         | 38/418 [03:51<36:55,  5.83s/it]



  9%|▉         | 39/418 [03:56<36:10,  5.73s/it]



 10%|▉         | 40/418 [04:02<36:28,  5.79s/it]



 10%|▉         | 41/418 [04:08<35:56,  5.72s/it]



 10%|█         | 42/418 [04:15<38:50,  6.20s/it]



 10%|█         | 43/418 [04:21<38:08,  6.10s/it]



 11%|█         | 44/418 [04:27<37:49,  6.07s/it]



 11%|█         | 45/418 [04:33<37:13,  5.99s/it]



 11%|█         | 46/418 [04:39<37:54,  6.12s/it]



 11%|█         | 47/418 [04:45<36:57,  5.98s/it]



 11%|█▏        | 48/418 [04:50<35:50,  5.81s/it]



 12%|█▏        | 49/418 [04:56<35:53,  5.84s/it]



 12%|█▏        | 50/418 [05:02<34:55,  5.70s/it]



 12%|█▏        | 51/418 [05:07<34:26,  5.63s/it]



 12%|█▏        | 52/418 [05:13<34:59,  5.74s/it]



 13%|█▎        | 53/418 [05:19<34:14,  5.63s/it]



 13%|█▎        | 54/418 [05:24<33:48,  5.57s/it]



 13%|█▎        | 55/418 [05:30<34:36,  5.72s/it]



 13%|█▎        | 56/418 [05:37<36:16,  6.01s/it]



 14%|█▎        | 57/418 [05:43<36:18,  6.04s/it]



 14%|█▍        | 58/418 [05:50<37:48,  6.30s/it]



 14%|█▍        | 59/418 [05:56<37:17,  6.23s/it]



 14%|█▍        | 60/418 [06:02<37:52,  6.35s/it]



 15%|█▍        | 61/418 [06:09<37:32,  6.31s/it]



 15%|█▍        | 62/418 [06:14<35:56,  6.06s/it]



 15%|█▌        | 63/418 [06:20<35:48,  6.05s/it]



 15%|█▌        | 64/418 [06:26<34:41,  5.88s/it]



 16%|█▌        | 65/418 [06:31<33:48,  5.75s/it]



 16%|█▌        | 66/418 [06:36<33:02,  5.63s/it]



 16%|█▌        | 67/418 [06:42<33:40,  5.76s/it]



 16%|█▋        | 68/418 [06:48<33:03,  5.67s/it]



 17%|█▋        | 69/418 [06:53<32:38,  5.61s/it]



 17%|█▋        | 70/418 [06:59<33:14,  5.73s/it]



 17%|█▋        | 71/418 [07:05<32:36,  5.64s/it]



 17%|█▋        | 72/418 [07:10<32:06,  5.57s/it]



 17%|█▋        | 73/418 [07:16<32:45,  5.70s/it]



 18%|█▊        | 74/418 [07:22<32:10,  5.61s/it]



 18%|█▊        | 75/418 [07:27<32:00,  5.60s/it]



 18%|█▊        | 76/418 [07:33<31:36,  5.54s/it]



 18%|█▊        | 77/418 [07:39<32:20,  5.69s/it]



 19%|█▊        | 78/418 [07:44<31:48,  5.61s/it]



 19%|█▉        | 79/418 [07:50<31:26,  5.56s/it]



 19%|█▉        | 80/418 [07:55<31:11,  5.54s/it]



 19%|█▉        | 81/418 [08:01<32:00,  5.70s/it]



 20%|█▉        | 82/418 [08:07<32:35,  5.82s/it]



 20%|█▉        | 83/418 [08:13<33:06,  5.93s/it]



 20%|██        | 84/418 [08:20<34:23,  6.18s/it]



 20%|██        | 85/418 [08:26<34:19,  6.18s/it]



 21%|██        | 86/418 [08:32<34:03,  6.15s/it]



 21%|██        | 87/418 [08:40<35:44,  6.48s/it]



 21%|██        | 88/418 [08:46<36:01,  6.55s/it]



 21%|██▏       | 89/418 [08:53<35:14,  6.43s/it]



 22%|██▏       | 90/418 [09:00<36:15,  6.63s/it]



 22%|██▏       | 91/418 [09:07<36:36,  6.72s/it]



 22%|██▏       | 92/418 [09:13<35:38,  6.56s/it]



 22%|██▏       | 93/418 [09:20<36:33,  6.75s/it]



 22%|██▏       | 94/418 [09:30<41:43,  7.73s/it]



 23%|██▎       | 95/418 [09:36<38:57,  7.24s/it]



 23%|██▎       | 96/418 [09:42<37:15,  6.94s/it]



 23%|██▎       | 97/418 [09:48<35:42,  6.67s/it]



 23%|██▎       | 98/418 [09:55<36:02,  6.76s/it]



 24%|██▎       | 99/418 [10:02<36:24,  6.85s/it]



 24%|██▍       | 100/418 [10:08<35:05,  6.62s/it]



 24%|██▍       | 101/418 [10:15<34:09,  6.47s/it]



 24%|██▍       | 102/418 [10:21<32:06,  6.10s/it]


1 Failed download:
- BA: No data found for this date range, symbol may be delisted





TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'