In [5]:
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yfinance as yf
from fpylll import IntegerMatrix, LLL
from scipy.optimize import linprog
# Set the style for seaborn
sns.set_theme(style="whitegrid")

In [None]:
# Fetch stock data using yfinance

def fetch_stock_data(tickers, start_date, end_date):
    sector_dict = {ticker: yf.Ticker(ticker).info.get("sector") for ticker in tickers}
    data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    data = data.dropna(axis=1, how='all')  # Drop columns with all NaN values
    data = data.ffill()  # Fill NaN values with the previous valid observation
    returns = data.pct_change()
    return sector_dict, returns

In [None]:
# Calculate risk vectors (beta, sector risk, volatility)

def calculate_risk_matrix(sector_dict, returns):
    market_returns = returns.mean(axis=1) # Average market return
    betas = returns.corrwith(market_returns)  # Beta of each stock
    volatilities = returns.std()  # Volatility of each stock
    # calculate sector risk
    sector_risks = returns.groupby(sector_dict, axis=1).std().mean()  # Average sector risk
    

In [None]:
def find_short_vector_coeffs(risk_matrix):
    # assume that the risk vectors are basis vectors of a lattice and apply LLL reduction
    # we want each risk vector to be a column in the risk matrix
    R = IntegerMatrix.from_matrix(risk_matrix.values.T)
    LLL.reduction(R)
    # Extract the reduced basis vectors
    reduced_vectors = np.array(R).T
    # Find the coefficients of the short vector in the original basis
    coeffs = np.linalg.lstsq(risk_matrix.values, reduced_vectors, rcond=None)[0]
    # Round the coefficients to the nearest integer
    coeffs = np.round(coeffs).astype(int)
    return coeffs