### Import libraries

In [1]:
import pandas as pd
import pandas_datareader as pdr
from pandas.tseries.offsets import MonthEnd
import getFamaFrenchFactors as gff

import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')
%matplotlib inline
matplotlib.rc('font', **{'size': 12})

import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

### Pull price data for our Solar Energy proxy

In [15]:
start = '01-01-2017'
end   = '01-01-2022'

tickers = ['QQQ']

prices = pdr.DataReader(tickers, 'yahoo', start=start, end=end)
prices = prices['Close']

returns = prices.pct_change().resample('W').agg(lambda x: (1 + x).prod() - 1)
# returns = prices.pct_change()
returns *= 100
returns = np.round(returns, 2)

In [14]:
prices.to_csv('./output-data/prices.csv')

### Pull Fama-French Industry Portfolios

They will be used to model sector risk exposure.

In [16]:
def pull_ff_data(name, start, end):
    ff = pdr.DataReader(name, 'famafrench', start=start, end=end)
    ff = ff[0]
    
    return ff

In [17]:
name = 'F-F_Research_Data_5_Factors_2x3_daily'

ff_factors = pull_ff_data(name, start, end)
ff_factors /= 100
ff_factors = (ff_factors + 1).cumprod()

ff_factors = ff_factors.pct_change().resample('W').agg(lambda x: (1 + x).prod() - 1)
# ff_factors = ff_factors.pct_change()
ff_factors *= 100

In [18]:
exogen = returns[returns.columns[1:]]
exogen = pd.merge(exogen, ff_factors, on='Date', how='inner')

exogen = sm.add_constant(exogen)

portfolio = returns[returns.columns[0]]
risk_free = exogen['RF']

exogen = exogen.drop('RF', axis=1)

endogen = portfolio - risk_free

model = sm.OLS(endogen, exogen, missing='drop')
results = model.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.942
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     835.8
Date:                Thu, 31 Mar 2022   Prob (F-statistic):          7.43e-156
Time:                        15:22:11   Log-Likelihood:                -256.42
No. Observations:                 261   AIC:                             524.8
Df Residuals:                     255   BIC:                             546.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0543      0.042      1.308      0.1

In [8]:
exogen.to_csv('./lm-data/exogen.csv')
endogen.to_csv('./lm-data/endogen.csv')