In [1]:
#A notebook that gets daily closing prices, calculates log returns, alpha, beta, and Sharpe Ratio
#TODO: Scrape earnings reports so we have some more graphs to work with
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
def createDataFrame(stockList, startDate, endDate):
    stockDataFrame = pd.DataFrame(columns = stockList)
    for stock in stockList:
        stockDataFrame[stock] = yf.download(stock, start=startDate, end=endDate, progress=False)['Close']
    return stockDataFrame

def logReturns(stockDataFrame):
    for stock in stockDataFrame.columns:
        stockDataFrame[stock] = np.log(stockDataFrame[stock]) - np.log(stockDataFrame[stock].shift(1))
    stockDataFrame.dropna(inplace=True)
    return stockDataFrame

#We use rolling alpha, beta in this case. Comparing with the SP500 for linear regression
def marketAlphaBeta(logReturnDF, benchmarkDF):
    alphaDataFrame = pd.DataFrame(columns = logReturnDF.columns, index=logReturnDF.index)
    betaDataFrame = pd.DataFrame(columns = logReturnDF.columns, index=logReturnDF.index)
    obs = logReturnDF.shape[0]
    lagWindow = 30
    for i in range((obs-lagWindow)):
        for stock in logReturnDF.columns:
            regressor = LinearRegression()
            regressor.fit(benchmarkDF['^GSPC'].to_numpy()[i : i +lagWindow+1].reshape(-1,1), logReturnDF[stock].to_numpy()[i : i +lagWindow+1])
            betaDataFrame[stock][i+lagWindow]  = regressor.coef_[0]
            alphaDataFrame[stock][i+lagWindow]  = regressor.intercept_
    alphaDataFrame.dropna(inplace=True)
    betaDataFrame.dropna(inplace=True)
    return alphaDataFrame, betaDataFrame

#We use rolling Sharpe ratio in this case. We use 10 year Treasury Note (^TNX) yield as "risk-free" rate
def rollingSharpeRatio(logReturnDF, logBenchmark):
    sharpeDataFrame = pd.DataFrame(columns = logReturnDF.columns, index = logReturnDF.index)
    obs = logReturnDF.shape[0]
    lagWindow = 60
    for i in range((obs-lagWindow)):
        for stock in logReturnDF.columns:
            netReturn = logReturnDF[stock][i : i +lagWindow+1].mean() - logBenchmark['^TNX'][i : i +lagWindow+1].mean()
            stdDev = logReturnDF[stock][i : i +lagWindow+1].std()
            sharpeDataFrame[stock][i+lagWindow]  = netReturn/stdDev
    sharpeDataFrame.dropna(inplace=True)
    return sharpeDataFrame

In [3]:
#Variables that we can modify to get our data
memeStocks = ['GME', 'AMC','BBBY','TLRY','PLTR','TSLA']
benchmarks = ['^GSPC','^TNX']
startDate = '2021-10-30'
endDate = '2022-10-30'

In [4]:
#Run once variables are filled out
newFrame = createDataFrame(memeStocks, startDate, endDate)
newFrame.to_csv("stockPrices.csv")
benchmarkFrame = createDataFrame(benchmarks, startDate, endDate)
benchmarkFrame.to_csv("benchmarkPrices.csv")
logDataFrame = logReturns(newFrame)
logDataFrame.to_csv("logReturnsStock.csv")
logBenchmark = logReturns(benchmarkFrame)
logBenchmark.to_csv("logReturnsBenchmark.csv")
alphaFrame, betaFrame = marketAlphaBeta(logDataFrame, logBenchmark)
alphaFrame.to_csv("alphas.csv")
betaFrame.to_csv("betas.csv")
sharpeDataFrame = rollingSharpeRatio(logDataFrame, logBenchmark)
sharpeDataFrame.to_csv("sharpeRatios.csv")