# 1. Install and call packages

In [43]:
import pandas as pd
import numpy as np
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats.distributions import chi2 
from arch import arch_model
from math import sqrt
import seaborn as sns
sns.set()
import scipy.optimize as sco
import matplotlib.pyplot as plt
import datetime as dt
import yfinance as yf

In [44]:
import config as fig

In [45]:
from arch.__future__ import reindexing

# 2. Define GARCH function (We simply import the data we already produced)

In [6]:
def GARCH_predict(symbol_list, start, end, interval): 
    
    #download data and calculate returns
    data = yf.download(symbol_list, start, end, interval = interval)
    ret = data.pct_change()['Adj Close']
    ret = ret.dropna()
    
    #create list to store predicted variance and volatility
    variance_list = []
    vol_list = []
    
    for symbol in symbol_list:
        
        model = arch_model(ret[symbol], 
                            mean = "Constant",
                            vol = "GARCH", 
                            dist = 'normal', 
                            p = 1, q = 1, 
                            rescale = False) 
       
        result = model.fit(update_freq = 5, disp = 'off')
        forecast = result.forecast()
        
        predict_var = (forecast.variance.iloc[-1]).iloc[0]
        variance_list.append(predict_var)
        vol_list.append(np.sqrt(predict_var))
        
        # It's optional to print other statistical result
        # print(result.plot())
        # print(result.summary())
        # print(forecast.mean)

    df = pd.DataFrame(columns = symbol_list, index = ['predicted var','predicted vol'])
    df.loc['predicted var'] = variance_list
    df.loc['predicted vol'] = vol_list
    
    # The function returns a DataFrame containing predicted variance and volatility values.
    return(df)



# 3. Define Basic MVO Framework

In [61]:
def neg_sharpe_ratio(weights, mean_returns, cov_matrix, risk_free_rate):
    
    # Recall portfolio_annualised_performance(weights, mean_returns, cov_matrix) returns portfolio standard deviation and portfolio return
    p_var = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    p_ret = np.sum(mean_returns*weights)
    return -(p_ret - risk_free_rate/52) / p_var

def max_sharpe_ratio(mean_returns, cov_matrix, risk_free_rate):
    num_assets = len(mean_returns)
    args = (mean_returns, cov_matrix, risk_free_rate)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bound = (0,0.25)
    bounds = tuple(bound for asset in range(num_assets))
    
    result = sco.minimize(neg_sharpe_ratio, num_assets*[1./num_assets,], args=args,
                        method='SLSQP', bounds=bounds, constraints=constraints)
    return result

def MVO_result(df,mean_returns, cov_matrix, risk_free_rate):    

    max_sharpe = max_sharpe_ratio(mean_returns, cov_matrix, risk_free_rate)
    print ("-"*80)
    print ("Maximum Sharpe Ratio Portfolio Allocation\n")
    print (max_sharpe)
    
    weights = max_sharpe['x']
    rp = np.sum(mean_returns*weights)
    sdp = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    
    
    max_sharpe_allocation = pd.DataFrame(max_sharpe.x,index=df.columns,columns=['allocation'])
    max_sharpe_allocation.allocation = [round(i*100,2)for i in max_sharpe_allocation.allocation]
    max_sharpe_allocation = max_sharpe_allocation.T
    
    print ("-"*80)
    print ("Weekly Return:", round(rp,5))
    print ("Weekly Volatility:", round(sdp,5))
    print ("Max Weekly Sharpe Ratio:", (rp - (risk_free_rate/52))/sdp)
    print ("\n")
    print (max_sharpe_allocation)
    return max_sharpe.x

# 4. Seclet Stocks Based on Valuation Matrix Score

The FactSet-sourced file contains valuation scores for SP500 composition stocks, categorized by industry. I re-grouped the data by narrowing industry types down to 9 types only: Financials, Chemicals, Tech, Utilities, Air, F&B, Oil, Services and Others. The criteria used in my trading is to select the stocks with best combined score in each industry

In [54]:
# upload stock score data scv file (downloaded from FactSet)
stock = pd.read_csv('./files/Scoring the SP 500 - Valuation and Sales Growth.csv', na_values=['#N/A'])

# set index by symbol
stock = stock.set_index('Symbol')

# look for the max score within each industry
stock['score_max'] = stock.groupby(['Industry'])['Combined Score'].transform(max)

# select stocks with industry max score 
selection = stock[stock['Combined Score']>=stock['score_max']*0.99]

# 5. Call Functions to Calculate Allocation for chozen stocks

In [55]:
# input parameters
# symbol_list = selection.index.tolist()
symbol_list = ['AAPL', 'MSFT', 'NVDA', 'JNJ', 'NVS','JPM','GS','AMZN','DIS','MCD','NEE','BA','CAT','XOM','CVX','RIO','BHP']
end = dt.datetime.now()
start = end - dt.timedelta(140)
interval = "1wk"

# download data
returns = yf.download(symbol_list, start, end, interval = interval).pct_change()['Adj Close'].dropna()
mean_returns = returns.mean()
cov_matrix = returns.cov()
risk_free_rate = 0.12 / 100

# print optimal allocation using historical covarianc matrix
allocation_hist = MVO_result(returns, mean_returns, cov_matrix, risk_free_rate)
print('MVO result by historical covariance matrix')
print(allocation_hist)

[*********************100%***********************]  17 of 17 completed
MVO result by historical covariance matrix
[2.50000000e-01 6.60980435e-02 0.00000000e+00 0.00000000e+00
 0.00000000e+00 4.25265173e-15 0.00000000e+00 0.00000000e+00
 2.16254314e-01 2.71018421e-15 2.50000000e-01 0.00000000e+00
 4.27828056e-15 2.17074581e-01 5.73061539e-04 0.00000000e+00
 4.50628123e-15]


### Using the predicted variance

The diagonal elements represent the variance of the returns of each asset. Variance is the square of volatility, so if you have the forecasted volatilities from your GARCH model, you would square these values to get the variance, and then place those on the diagonal of your covariance matrix.

In [1]:
import pandas as pd
import os

# Define your file path
File_path = "../Output/assets/Drop000/"
Files = os.listdir(File_path)



def retrieve_var(date):
    symbol_list = []
    variance_list = []
    # Initialize an empty DataFrame to store forecasts
    Forecasts = pd.DataFrame(columns=['Asset', 'predicted var'])

    # Loop through all files

    for file in Files:
        # Skip the ".DS_Store" file
        if file == ".DS_Store":
            continue
        
        path = os.path.join(File_path, file)
        asset_name = file.split('.')[0].split('_')[3]  # Assumes file name is the asset name
        symbol_list.append(asset_name)
        asset = pd.read_csv(path)
        
        # Get 'VaR_T_ANN_ARCH' for the given date
        var_value = asset[asset['Date_Forecast'] == date]['Forecast_T_ANN_ARCH'].values[0]
        variance_list.append(var_value)
        
    variance_list = np.reshape(variance_list, (1, -1))
    # Create a DataFrame with the asset names as columns and predicted variances as the row
    df = pd.DataFrame(variance_list, columns=symbol_list, index=['predicted var'])
    return df

In [57]:
from IPython.display import display
import ipywidgets as widgets

widget_start_date = widgets.DatePicker(
    description='Start date',
    disabled=False
)
widget_start_time = widgets.DatetimePicker(
    description='Start time',
    disabled=False
)
widget_end_date = widgets.DatePicker(
    description='End date',
    disabled=False
)

display(widget_start_date, widget_start_time, widget_end_date)

DatePicker(value=None, description='Start date', step=1)

DatetimePicker(value=None, description='Start time')

DatePicker(value=None, description='End date', step=1)

In [58]:
MTL_GARCH_pred = retrieve_var(str(widget_start_date.value))
MTL_GARCH_pred

IndexError: index 0 is out of bounds for axis 0 with size 0

In [63]:

# replace diagonal elements of cov matrix by GARCH-predicted variance.
# GARCH_var = GARCH_predict(symbol_list, start, end, interval)

adjust_cov_matrix =  cov_matrix.copy()
for symbols in symbol_list:
    adjust_cov_matrix[symbols][symbols] = MTL_GARCH_pred[symbols][0]

# print optimal allocation using GARCH covariance matrix
allocation_GARCH = MVO_result(returns, mean_returns, adjust_cov_matrix, risk_free_rate)
print('MVO result by GARCH-based covariance matrix')
print(np.rint(allocation_GARCH*100))

# print the shrinkage allocation
print('-'*80)
print('MVO shrinkage result')
print(0.5*allocation_hist + 0.5*allocation_GARCH)

--------------------------------------------------------------------------------
Maximum Sharpe Ratio Portfolio Allocation

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: -0.18537378065230703
       x: [ 1.295e-01  1.318e-01 ...  1.935e-17  1.714e-17]
     nit: 6
     jac: [ 1.711e-02  1.715e-02 ...  5.225e-02  5.981e-02]
    nfev: 108
    njev: 6
--------------------------------------------------------------------------------
Weekly Return: 0.01647
Weekly Volatility: 0.08875
Max Weekly Sharpe Ratio: 0.18537378065230703


             AAPL   AMZN    BA  BHP   CAT  CVX  DIS   GS   JNJ   JPM  MCD  \
allocation  12.95  13.18  0.29  0.0  1.02  0.0  0.0  0.0  4.44  1.92  8.1   

             MSFT  NEE  NVDA    NVS  RIO  XOM  
allocation  18.36  1.7  25.0  13.03  0.0  0.0  
MVO result by GARCH-based covariance matrix
[13. 13.  0.  0.  1.  0.  0.  0.  4.  2.  8. 18.  2. 25. 13.  0.  0.]
----------------------------------------------------------------------

In [64]:
allocation_GARCH

array([1.29546330e-01, 1.31764596e-01, 2.87219196e-03, 0.00000000e+00,
       1.01942461e-02, 0.00000000e+00, 3.56838040e-17, 0.00000000e+00,
       4.44270438e-02, 1.91856899e-02, 8.10466633e-02, 1.83626833e-01,
       1.70298999e-02, 2.50000000e-01, 1.30306506e-01, 1.93478497e-17,
       1.71388647e-17])

### problems
I dont understand the interval. where do i specify a month re-balancing.

### Time step get monthly 

### Replace diagonal elements of cov matrix by MTL-GARCH-predicted variance.


1. In our case, we train a Transformer model for each asset to forecast the volatility for the next 4 time steps (weeks) and the last one will
be the estimation of the future 1-month volatility. We then create a VCV matrix based on
this prediction and the historical correlation matrix. which will be the input to the portfolio
allocation problem described above.

3. Iterate through MTL monthly. The time step we are on is (i - 1month) Basically we lag behind the prediction by a month. Bu

# 4. Reference
* https://campus.datacamp.com/courses/garch-models-in-python/garch-model-fundamentals?ex=9
* https://stackoverflow.com/questions/59884917/forecasting-volatility-using-garch-in-python-arch-package
* https://stackoverflow.com/questions/15705630/get-the-rows-which-have-the-max-count-in-groups-using-groupby
* Professor Lee's BootCamp Videos

### Mon

## back testing
## generate portfolio returns starting from a period x. 
## switch out graphs for different dropouts
## show the the allocation at each weekly time period.