In [2]:
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
from fpylll import IntegerMatrix, LLL
from scipy.optimize import linprog

In [None]:
# Fetch stock data using yfinance

def fetch_stock_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)
    data = data.dropna(axis=1, how='all')  # Drop columns with all NaN values
    data = data.dropna(axis=0, how='all')  # Drop rows with all NaN values
    data = data.ffill()  # Fill NaN values with the previous valid observation
    return data

In [39]:
tickers = ['^GSPC','AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
start_date = '2020-01-01'
end_date = '2023-01-01'
data = fetch_stock_data(tickers, start_date, end_date)

[*********************100%***********************]  6 of 6 completed


In [55]:
returns = data['Close'].pct_change().dropna()  # Calculate daily returns
sp500_returns = returns['^GSPC']  # S&P 500 returns
betas = returns.corrwith(sp500_returns) # Beta of each stock
volatilities = returns.std()  # Volatility of each stock
volume_usd = data['Volume'] * data['Close']  # Convert volume to USD
amihud_illiquidity = (np.abs(returns)/volume_usd).dropna().mean(axis=0)  # Amihud Illiquidity Measure
left_threshold = returns['^GSPC'].quantile(0.05)
right_threshold = returns['^GSPC'].quantile(0.95)
tail_data = returns[(returns['^GSPC'] >= right_threshold) | (returns['^GSPC'] <= left_threshold)]
tail_corr = tail_data.corr()
tail_risk = tail_corr['^GSPC']

In [69]:
def standardise_risk_matrix(raw_risk_matrix):
    # standardize the risk matrix using min-max scaling
    risk_matrix = (raw_risk_matrix - raw_risk_matrix.min()) / (raw_risk_matrix.max() - raw_risk_matrix.min())
    return risk_matrix

In [70]:
# Calculate risk vectors (beta, sector risk, volatility)

def calculate_risk_matrix(data):
    returns = data['Close'].pct_change().dropna()  # Calculate daily returns
    sp500_returns = returns['^GSPC']  # S&P 500 returns
    betas = returns.corrwith(sp500_returns) # Beta of each stock
    volatilities = returns.std()  # Volatility of each stock
    volume_usd = data['Volume'] * data['Close']  # Convert volume to USD
    amihud_illiquidity = (np.abs(returns)/volume_usd).dropna().mean(axis=0)  # Amihud Illiquidity Measure
    left_threshold = returns['^GSPC'].quantile(0.05)
    right_threshold = returns['^GSPC'].quantile(0.95)
    tail_data = returns[(returns['^GSPC'] >= right_threshold) | (returns['^GSPC'] <= left_threshold)]
    tail_corr = tail_data.corr()
    tail_risk = tail_corr['^GSPC']
    raw_risk_matrix = pd.DataFrame({
        'Beta': betas,
        'Volatility': volatilities,
        'Amihud Illiquidity': amihud_illiquidity,
        'Tail Risk': tail_risk
    })
    # standardize the risk matrix using min-max scaling
    risk_matrix = standardise_risk_matrix(raw_risk_matrix)
    #drop GSPC row
    risk_matrix = risk_matrix.drop(index='^GSPC', errors='ignore')

    return risk_matrix

In [71]:
risk_matrix = calculate_risk_matrix(data)

In [72]:
risk_matrix

Unnamed: 0_level_0,Beta,Volatility,Amihud Illiquidity,Tail Risk
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,0.640386,0.245469,0.277572,0.802699
AMZN,0.288709,0.291506,0.367738,0.178427
GOOGL,0.615798,0.193891,1.0,0.777819
MSFT,0.711088,0.198894,0.459873,0.842417
TSLA,0.0,1.0,0.387232,0.0


In [None]:
def find_short_vector_coeffs(risk_matrix):
    # assume that the risk vectors are basis vectors of a lattice and apply LLL reduction
    # we want each risk vector to be a column in the risk matrix
    R = IntegerMatrix.from_matrix(risk_matrix.values.T)
    LLL.reduction(R)
    # Extract the reduced basis vectors
    reduced_vectors = np.array(R).T
    # Find the coefficients of the short vector in the original basis
    coeffs = np.linalg.lstsq(risk_matrix.values, reduced_vectors, rcond=None)[0]
    # Round the coefficients to the nearest integer
    coeffs = np.round(coeffs).astype(int)
    return coeffs

In [None]:
# Scrape S&P 500 tickers from Wikipedia
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
table = pd.read_html(url)
sp500 = table[0]
tickers = sp500['Symbol'].tolist()

# convert tickers to yfinance format
tickers = [ticker.replace('.', '-') for ticker in tickers]
tickers = ['^GSPC'] + tickers  # Add S&P 500 index ticker