In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import yfinance as yf
import statsmodels.api as sm
import xlwings as xw

In [2]:
INPUT_TICKERS = ['AAPL','TSLA']
TICKERS = ['SPY']
START_DATE = '2021-01-01'
END_DATE = '2021-11-30'

In [3]:
# get timeseries data from Yahoo Finance

df = yf.download( TICKERS + INPUT_TICKERS, start=START_DATE, end=END_DATE)

[*********************100%***********************]  3 of 3 completed


In [4]:
# calculate log return

idx = pd.IndexSlice
df_close = df.loc[:,idx['Close']].copy()

df_close_log_return = np.log(df_close) - np.log(df_close.shift(1))
df_close_log_return.dropna(inplace=True)

x = df_close_log_return[INPUT_TICKERS]
y = df_close_log_return[TICKERS]

In [5]:
# run regression model using statmodels

x_con = sm.add_constant(x)
model = sm.OLS(y, x_con).fit()


In [None]:
# print model summary
print(model.summary())

In [8]:
# put into a function

def factor_regressions(dependent, independents, start_date, end_date):
    #   Inputs:
    #       dependent (list):
    #       independents (list):
    #       start_date - YYYY-MM-DD (string):
    #       end_date - YYYY-MM-DD (string):
    #   Outputs:
    #       model (statsmodel)

    # get timeseries data from Yahoo Finance
    df = yf.download( dependent + independents, start=start_date, end=end_date)

    # calculate log return
    idx = pd.IndexSlice
    df_close = df.loc[:,idx['Close']].copy()

    df_close_log_return = np.log(df_close) - np.log(df_close.shift(1))
    df_close_log_return.dropna(inplace=True)

    x = df_close_log_return[independents]
    y = df_close_log_return[dependent]

    # run regression model using statmodels
    x_con = sm.add_constant(x)
    model = sm.OLS(y, x_con).fit()

    return model


In [9]:
RESULT = factor_regressions(dependent=TICKERS, independents=INPUT_TICKERS, start_date=START_DATE, end_date=END_DATE)

[*********************100%***********************]  3 of 3 completed


In [11]:
print(RESULT.summary())

                            OLS Regression Results                            
Dep. Variable:                    SPY   R-squared:                       0.514
Model:                            OLS   Adj. R-squared:                  0.510
Method:                 Least Squares   F-statistic:                     119.0
Date:                Sat, 22 Jan 2022   Prob (F-statistic):           5.46e-36
Time:                        16:26:46   Log-Likelihood:                 864.41
No. Observations:                 228   AIC:                            -1723.
Df Residuals:                     225   BIC:                            -1713.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0006      0.000      1.752      0.0