In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

def evaluate(observation, forecast): 
    # Call sklearn function to calculate MAE
    mae = mean_absolute_error(observation, forecast)
    #print(f'Mean Absolute Error (MAE): {round(mae,3)}')
    # Call sklearn function to calculate MSE
    mse = mean_squared_error(observation, forecast)
    #print(f'Mean Squared Error (MSE): {round(mse,3)}')
    return math.sqrt(mse)

In [None]:
def applyGarchModels(asset, asset_name):
    skewt = arch_model(asset, p=1, q=1,mean="constant",vol="GARCH",dist="skewt")    
    GJR = arch_model(asset, p=1, q=1, o=1, mean="constant",vol="GARCH",dist="skewt")    
    E = arch_model(asset, p=1, q=1, o=1, mean="constant",vol="EGARCH",dist="skewt")    

    skewtResult = skewt.fit(disp="off")
    GJRResult = GJR.fit(disp="off")
    EResult = E.fit(disp="off")

    skewtVolatility = skewtResult.conditional_volatility
    GJRVolatility = GJRResult.conditional_volatility
    EVolatility = EResult.conditional_volatility

    garchVolatilityData = pd.concat([asset, skewtVolatility, GJRVolatility, EVolatility], axis=1, names=['Return', 'SkewT', 'GJR', 'E'])
    garchVolatilityData.to_csv(graphic_path + "garch_" + asset_name + ".csv")

    # print(f'GARCH BIC: {skewtResult.bic}')
    # print(f'GJR-GARCH BIC: {GJRResult.bic}')
    # print(f'EGARCH BIC: {EResult.bic}')

    print (round(evaluate(asset.sub(asset.mean()).pow(2), skewtVolatility**2), 3))
    print (round(evaluate(asset.sub(asset.mean()).pow(2), GJRVolatility**2), 3))
    print (round(evaluate(asset.sub(asset.mean()).pow(2), EVolatility**2), 3))

In [None]:
def applyMLModels(asset, asset_name):
    np.random.seed(31415)
    

    realized_vol = asset.rolling(5).std()
    realized_vol = pd.DataFrame(realized_vol)
    realized_vol.reset_index(drop=True, inplace=True)

    returns_svm = asset**2
    returns_svm = returns_svm.reset_index()
    del returns_svm['Date']

    X = pd.concat([realized_vol, returns_svm], axis=1, ignore_index=True)
    X = X[4:].copy()
    X = X.reset_index()
    X.drop('index', axis=1, inplace=True)

    realized_vol = realized_vol.dropna().reset_index()
    realized_vol.drop('index', axis=1, inplace=True)

    X = X.values    
    realized_vol = realized_vol.values.reshape(-1,)

    svr_rbf = SVR(kernel='rbf')
    
    para_grid = {'gamma': sp_rand(),
                        'C': sp_rand(),
                        'epsilon': sp_rand()} 
    clf_SVR_rbf = RandomizedSearchCV(svr_rbf, para_grid) 
    
    NN_vol = MLPRegressor(learning_rate_init=0.001, random_state=1)
    para_grid_NN = {'hidden_layer_sizes': [(100, 50), (50, 50), (10, 100)],
             'max_iter': [500, 1000],
             'alpha': [0.00005, 0.0005 ]} 
    clf_NN = RandomizedSearchCV(NN_vol, para_grid_NN)
    DLmodel = keras.Sequential(
                 [layers.Dense(256, activation="relu"),
                 layers.Dense(128, activation="relu"),
                 layers.Dense(1, activation="linear"),])
    DLmodel.compile(loss='mse', optimizer='rmsprop')
        
    clf_SVR_rbf.fit(X, realized_vol)
    print ("RBF fitted")
    clf_NN.fit(X, realized_vol) 
    
    epochs_trial = np.arange(100, 400, 4) 
    batch_trial = np.arange(100, 400, 4) 
    for i, j, k in zip(range(4), epochs_trial, batch_trial):
         DLmodel.fit(X, realized_vol,
                 batch_size=k, epochs=j, verbose=False) 
    
    fitted_SVR_rbf = clf_SVR_rbf.predict(X)
    fitted_NN = clf_NN.predict(X)
    fitted_DL = DLmodel.predict(np.asarray(X)) 

    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_SVR_rbf**2), 2))
    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_NN**2),2))
    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_DL**2), 2))

    fitted_DL = np.hstack(fitted_DL)
    fitted_DL = fitted_DL.tolist()
    fitted_DL = pd.Series(fitted_DL)
    fitted_NN = pd.Series(fitted_NN)
    fitted_SVR_rbf = pd.Series(fitted_SVR_rbf)
    return_truncated = asset[4:]

    dates = return_truncated.index.tolist()

    fitted_DL = fitted_DL.tolist()
    fitted_NN = fitted_NN.tolist()
    fitted_SVR_rbf = fitted_SVR_rbf.tolist()
    return_truncated = return_truncated.tolist()

    df = pd.DataFrame(list(zip(dates, return_truncated, fitted_SVR_rbf, fitted_NN, fitted_DL)),
                columns =['Dates', 'Return', 'SVR', 'NN', 'DL'])

    df.to_csv(graphic_path + "ml_" + asset_name + ".csv")

applyMLModels(BTC, "BTC")
applyMLModels(SP, "SP")
applyMLModels(CABLE, "CABLE")



In [None]:
def applyMLModels(asset, asset_name):
    np.random.seed(31415)
    

    #realized_vol = asset.rolling(5).std()
    #realized_vol = pd.DataFrame(realized_vol)
    #realized_vol.reset_index(drop=True, inplace=True)

    returns_svm = asset**2
    returns_svm = returns_svm.reset_index()
    del returns_svm['Date']

    #X = pd.concat([realized_vol, returns_svm], axis=1, ignore_index=True)
    #X = X[4:].copy()
    #X = X.reset_index()
    #X.drop('index', axis=1, inplace=True)

    #realized_vol = realized_vol.dropna().reset_index()
    #realized_vol.drop('index', axis=1, inplace=True)


    X = X.values    
    #realized_vol = realized_vol.values.reshape(-1,)

    svr_rbf = SVR(kernel='rbf')
    
    para_grid = {'gamma': sp_rand(),
                        'C': sp_rand(),
                        'epsilon': sp_rand()} 
    clf_SVR_rbf = RandomizedSearchCV(svr_rbf, para_grid) 
    
    NN_vol = MLPRegressor(learning_rate_init=0.001, random_state=1)
    para_grid_NN = {'hidden_layer_sizes': [(100, 50), (50, 50), (10, 100)],
             'max_iter': [500, 1000],
             'alpha': [0.00005, 0.0005 ]} 
    clf_NN = RandomizedSearchCV(NN_vol, para_grid_NN)
    DLmodel = keras.Sequential(
                 [layers.Dense(256, activation="relu"),
                 layers.Dense(128, activation="relu"),
                 layers.Dense(1, activation="linear"),])
    DLmodel.compile(loss='mse', optimizer='rmsprop')
        
    clf_SVR_rbf.fit(X, realized_vol)
    print ("RBF fitted")
    clf_NN.fit(X, realized_vol) 
    
    epochs_trial = np.arange(100, 400, 4) 
    batch_trial = np.arange(100, 400, 4) 
    for i, j, k in zip(range(4), epochs_trial, batch_trial):
         DLmodel.fit(X, realized_vol,
                 batch_size=k, epochs=j, verbose=False) 
    
    fitted_SVR_rbf = clf_SVR_rbf.predict(X)
    fitted_NN = clf_NN.predict(X)
    fitted_DL = DLmodel.predict(np.asarray(X)) 

    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_SVR_rbf**2), 2))
    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_NN**2),2))
    print (round(evaluate(asset[4:].sub(asset[4:].mean()).pow(2), fitted_DL**2), 2))

    fitted_DL = np.hstack(fitted_DL)
    fitted_DL = fitted_DL.tolist()
    fitted_DL = pd.Series(fitted_DL)
    fitted_NN = pd.Series(fitted_NN)
    fitted_SVR_rbf = pd.Series(fitted_SVR_rbf)
    return_truncated = asset[4:]

    dates = return_truncated.index.tolist()

    fitted_DL = fitted_DL.tolist()
    fitted_NN = fitted_NN.tolist()
    fitted_SVR_rbf = fitted_SVR_rbf.tolist()
    return_truncated = return_truncated.tolist()

    df = pd.DataFrame(list(zip(dates, return_truncated, fitted_SVR_rbf, fitted_NN, fitted_DL)),
                columns =['Dates', 'Return', 'SVR', 'NN', 'DL'])

    df.to_csv(graphic_path + "ml_" + asset_name + ".csv")

applyMLModels(BTC, "BTC")
applyMLModels(SP, "SP")
applyMLModels(CABLE, "CABLE")

