In [1]:
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta
from arch import arch_model
import yfinance as yf
from tqdm import tqdm
from arch.__future__ import reindexing

In [8]:
#Return calculation
def ReturnCalculation (Database,lag):
    dimension=Database.shape[0];dif=lag;Out=np.zeros([dimension-dif])
    for i in range(dimension-dif):
        Out[i]=(np.log(Database['Close'][i+dif])-np.log(Database['Close'][i]))
    return np.append(np.repeat(np.nan, dif),Out), Database.index

#STD Calculation
def SDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif])
    for i in range (dimension-dif):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(np.repeat(np.nan, dif),Out)

#STD Calculation
def TrueSDCalculation (DailyReturns, LagSD):
    dimension=DailyReturns.shape[0]; dif=LagSD; Out=np.zeros([dimension-dif+1])
    for i in range (dimension-dif+1):
        Out[i]=np.std(DailyReturns[i:i+LagSD],ddof=1)
    return np.append(Out,np.repeat(np.nan, dif-1))


#Database is calculated
def M_DatabaseGeneration (Database_daily, Lag, LagSD):
    DailyReturns, Index = ReturnCalculation(Database_daily,Lag)    
    TrueSD = SDCalculation(DailyReturns, LagSD)    
    Data = pd.DataFrame({'DailyReturns': DailyReturns,'TrueSD': TrueSD})
    Data = Data.set_index(Index)
    Data = Data.dropna() 
    weekly_returns = Data['DailyReturns'].resample('W-FRI').sum()
    weekly_average_volatility = Data['TrueSD'].resample('W-FRI').mean()*np.sqrt(5) 
    
    Data = pd.DataFrame({'DailyReturns': weekly_returns,'TrueSD': weekly_average_volatility})
    return Data.dropna()





In [9]:
start='2008-01-01'; end='2015-12-31'; 
asset = "^GSPC"
Lag=1; LagSD=5
IndexEndDays=yf.download(asset,start=start,  end=end, progress=False).resample('W-FRI').last().index
Database=yf.download(asset,start, end, progress=False).resample('W-FRI').last()
Data = M_DatabaseGeneration(Database, Lag, LagSD)
Data

Unnamed: 0_level_0,DailyReturns,TrueSD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-02-15,0.013949,0.093263
2008-02-22,0.002308,0.096872
2008-02-29,-0.016753,0.075868
2008-03-07,-0.028401,0.078706
2008-03-14,-0.004052,0.054165
...,...,...
2015-12-04,0.000756,0.055728
2015-12-11,-0.038659,0.055725
2015-12-18,-0.003395,0.066508
2015-12-25,0.027268,0.056196


### GARCH (1,1)

In [10]:
#Fitting of GARCH(1,1)
def GARCH_Model_Student (Data):
    AR_Data=Data['DailyReturns']*100
    GARCH11 = arch_model(AR_Data, dist ='t')
    res_GARCH11 = GARCH11.fit(disp='off')
    CV_GARCH11 = res_GARCH11.conditional_volatility
    For_CV_GARCH11 = np.array(res_GARCH11.forecast(horizon=4).variance.dropna())
    return GARCH11, res_GARCH11, CV_GARCH11, For_CV_GARCH11

In [11]:

IndexEndDays=yf.download(asset,start=start,  end=end, progress=False).resample('W-FRI').last().index

Lag=1; LagSD=5; Timestep=10; Dropout=0.1; LearningRate=0.01; Epochs=100

DataValidation = M_DatabaseGeneration(yf.download(asset,start='2000-01-01', end=date.today()+timedelta(days=1), progress=False).resample('W-FRI').last(), Lag, LagSD)

ResultsCollection=pd.DataFrame({'Date_Forecast': [], 'h1': [], 'h2': [], 'h3':[], 'h4': []})
#Loop for generating the results
for i in tqdm(range(IndexEndDays.shape[0])):
    #Database is downloaded from yahoo finance and lag of returns defined
    Database=yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date(), progress=False).resample('W-FRI').last()
    Database_daily = yf.download(asset,start=IndexEndDays[i].date()-timedelta(days=780), end=IndexEndDays[i].date() , progress=False)
    #Database for fitting the models is generated
    Data = M_DatabaseGeneration(Database_daily, Lag, LagSD)
   
    #Fitting of Transformed ANN-ARCH model, ARCH models and forecasting of the next volatility value

    ARCH11, res_GARCH11, CV_GARCH11, GARCH = GARCH_Model_Student(Data)
    Date_Forcast = IndexEndDays[i]

    IterResults={'Date_Forecast':Date_Forcast.date(),'h1': GARCH[0][0]/100,'h2': GARCH[0][1]/100,'h3': GARCH[0][2]/100,'h4': GARCH[0][3]/100,'TrueSD':Data['TrueSD'][-1]}
    
    IterResults_df = pd.DataFrame(IterResults,index =[0])
    ResultsCollection = ResultsCollection.append(IterResults_df, ignore_index=True)

    ResultsCollection.to_csv(f'/Users/lesegomatojane/Documents/MIT807/Muliti-transformer/MultiTransformer-master/MultiTransformer/Train/assets/Drop001/GARCH/GARCH_GSPC.csv',index=False)



100%|██████████| 418/418 [14:34<00:00,  2.09s/it]


In [12]:
path = '/Users/lesegomatojane/Documents/MIT807/Muliti-transformer/MultiTransformer-master/MultiTransformer/Train/assets/Drop001/GARCH/GARCH_GSPC.csv'
# path = 'test3.csv'
# Re-importing the test.csv dataset using the correct date column name
df_updated= pd.read_csv(path, index_col='Date_Forecast', parse_dates=True)
# df_updated = df_updated.drop(df_updated.columns[4],axis=1)
df_updated

Unnamed: 0_level_0,h1,h2,h3,h4,TrueSD
Date_Forecast,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-01-04,0.036610,0.036788,0.036965,0.037143,0.020729
2008-01-11,0.038613,0.038794,0.038976,0.039157,0.028113
2008-01-18,0.042652,0.042866,0.043081,0.043295,0.034126
2008-01-25,0.043982,0.044200,0.044419,0.044637,0.034069
2008-02-01,0.061618,0.061936,0.062254,0.062572,0.033586
...,...,...,...,...,...
2015-12-04,0.042419,0.042666,0.042913,0.043160,0.009915
2015-12-11,0.040322,0.040538,0.040754,0.040970,0.031972
2015-12-18,0.042362,0.042591,0.042821,0.043050,0.023777
2015-12-25,0.041696,0.041908,0.042119,0.042330,0.033333
