In [1]:
import pandas as pd
import numpy as np
import yfinance as yf

from statsmodels.regression.rolling import RollingOLS
import statsmodels.api as sm
from sklearn.linear_model import Lasso

import warnings
warnings.filterwarnings("ignore")

In [2]:
tickers = 'aapl, spy, msft'

df = yf.download(tickers)['Adj Close'].dropna()

df = df.resample('1M').last().pct_change().dropna()

[*********************100%***********************]  3 of 3 completed


In [3]:
def fit_ols(y, X):
    model = sm.OLS(y, X)
    return model.fit()


def fit_rollingols(y, X, window = None, expanding=True):
    if window is None:
        window = len(X)
    model = RollingOLS(y, X, expanding = expanding, window = window, min_nobs = len(X.columns))
    return model.fit()
    

# funct = [Lasso, Ridge]
def fit_pr(funct, y, X, alpha=0, constant = True):
    model = funct(alpha=alpha, fit_intercept=constant)
    return model.fit(X, y)
    


In [4]:
X, y = sm.add_constant(df[df.columns[1:]]), df[df.columns[0]].to_frame()

In [5]:
# Parameters
window = 200
expanding = False

In [6]:
ols = fit_ols(y, X)
rollingols = fit_rollingols(y, X, window, expanding)

In [7]:
# Check
# assert np.sum(ols.params.values - rollingols.params.iloc[-1].values) < 10e-9 , 'ERROR'

In [8]:
X, y = df[df.columns[1:]], df[df.columns[0]].to_frame()

In [9]:
lasso = fit_pr(Lasso, y, X)

In [10]:
# Check
assert np.sum(ols.params[1:].values - lasso.coef_ ) < 10e-9, 'ERROR'

In [11]:
def fit_rollingpr(funct, y, X, alpha=0, constant = True, window = None, expanding=True):
    
    if window is None:
        window = len(X)
    
    min_nobs  = len(X.columns)+ 1 if constant else len(X.columns)
    
    cols = sm.add_constant(X).columns if constant else X.columns
    
    pr_params = pd.DataFrame(index = X.index, columns = cols)
    
    for num, idx in enumerate(pr_params.index):
        
        start, stop = max(0, num - window +1), num + 1
        
        if num*expanding >= min_nobs - 1 or (num >=window -1):
            
            Xi, yi = X.iloc[start:stop], y.iloc[start:stop]
            
            result = funct(alpha=alpha, fit_intercept=constant).fit(Xi, yi)
            
            pr_params.loc[idx, X.columns] = result.coef_
            
            if constant:
            
                pr_params.loc[idx, 'const'] = result.intercept_[0]
    
    return pr_params

In [12]:
alpha = 0
constant = True

In [13]:
fit_rollingpr(Lasso, y, X, alpha, constant, window, expanding)

Unnamed: 0_level_0,const,MSFT,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1993-02-28,,,
1993-03-31,,,
1993-04-30,,,
1993-05-31,,,
1993-06-30,,,
...,...,...,...
2022-03-31,0.015841,0.279721,0.939091
2022-04-30,0.015502,0.26923,0.953741
2022-05-31,0.014301,0.293263,0.931184
2022-06-30,0.013831,0.290685,0.941382


In [15]:
rollingols.params

Unnamed: 0_level_0,const,MSFT,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1993-02-28,,,
1993-03-31,,,
1993-04-30,,,
1993-05-31,,,
1993-06-30,,,
...,...,...,...
2022-03-31,0.015841,0.279721,0.939091
2022-04-30,0.015502,0.269230,0.953741
2022-05-31,0.014301,0.293263,0.931184
2022-06-30,0.013831,0.290685,0.941382
