In [25]:
import pandas as pd
import pandas_datareader.data as web
import datetime, re, copy
import numpy as np
import statsmodels.formula.api as sm

df_nvda =pd.read_csv("NVDA.csv", decimal = ',')

In [26]:
df_nvda.head()

Unnamed: 0,Date,Open,Close,High,Low,Volume
0,4-Jan-16,32.290001,32.369999,32.580002,32.040001,8951900
1,5-Jan-16,32.98,32.889999,33.439999,32.5,12256800
2,6-Jan-16,32.349998,31.530001,32.5,31.16,11233600
3,7-Jan-16,30.74,30.280001,30.950001,29.879999,16132600
4,8-Jan-16,30.67,29.629999,30.700001,29.57,9961800


In [27]:
df_nvda.dtypes

Date      object
Open      object
Close     object
High      object
Low       object
Volume    object
dtype: object

In [28]:
df_nvda['Open'] = df_nvda['Open'].astype(float)

In [29]:
df_nvda.dtypes

Date       object
Open      float64
Close      object
High       object
Low        object
Volume     object
dtype: object

In [30]:
df_nvda['Date']= pd.to_datetime(df_nvda['Date']) 
df_nvda.head()

Unnamed: 0,Date,Open,Close,High,Low,Volume
0,2016-01-04,32.290001,32.369999,32.580002,32.040001,8951900
1,2016-01-05,32.98,32.889999,33.439999,32.5,12256800
2,2016-01-06,32.349998,31.530001,32.5,31.16,11233600
3,2016-01-07,30.74,30.280001,30.950001,29.879999,16132600
4,2016-01-08,30.67,29.629999,30.700001,29.57,9961800


In [31]:
df_nvda
df_nvda = df_nvda.set_index('Date')
df_nvda.head(3)

Unnamed: 0_level_0,Open,Close,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,32.290001,32.369999,32.580002,32.040001,8951900
2016-01-05,32.98,32.889999,33.439999,32.5,12256800
2016-01-06,32.349998,31.530001,32.5,31.16,11233600


In [8]:
open = df_nvda['Open']
rets = open.pct_change()*100
rets = pd.DataFrame(rets)
rets.rename( columns={"Open":"open"}, inplace=True)

In [9]:
rets.head()

Unnamed: 0_level_0,open
Date,Unnamed: 1_level_1
2016-01-04,
2016-01-05,2.136881
2016-01-06,-1.910255
2016-01-07,-4.97681
2016-01-08,-0.227716


In [10]:
# get fama/french factor data
# http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
three_fac = web.DataReader("F-F_Research_Data_Factors_daily", "famafrench")
f = copy.copy(three_fac[0])
f.rename( columns={c:re.sub(r'[0-9\-\s]','',c) for c in f.columns}, inplace=True)
f.head()

Unnamed: 0_level_0,MktRF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,1.69,0.58,1.12,0.0
2010-01-05,0.31,-0.59,1.21,0.0
2010-01-06,0.13,-0.24,0.52,0.0
2010-01-07,0.4,0.09,0.94,0.0
2010-01-08,0.33,0.4,0.01,0.0


In [11]:
m = pd.merge(rets,f,left_index=True,right_index=True)

In [12]:
m.head()

Unnamed: 0_level_0,open,MktRF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,,-1.59,-0.83,0.53,0.0
2016-01-05,2.136881,0.12,-0.22,0.01,0.0
2016-01-06,-1.910255,-1.35,-0.12,-0.01,0.0
2016-01-07,-4.97681,-2.44,-0.29,0.08,0.0
2016-01-08,-0.227716,-1.11,-0.47,-0.03,0.0


In [15]:
# excess returns
m['open_e'] = m.open-m.RF
train = m[:499]
valid = m[500:878]

In [19]:
# ols regression
result = sm.ols( formula = "open_e ~ MktRF + SMB + HML", data=train).fit()

# intercept = alpha
print(result.params)
print (result.summary())

Intercept    0.367306
MktRF        0.231769
SMB          0.620173
HML          0.320641
dtype: float64
                            OLS Regression Results                            
Dep. Variable:                 open_e   R-squared:                       0.029
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     4.920
Date:                Sun, 15 Dec 2019   Prob (F-statistic):            0.00223
Time:                        19:11:32   Log-Likelihood:                -1173.0
No. Observations:                 498   AIC:                             2354.
Df Residuals:                     494   BIC:                             2371.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------

In [21]:
y = result.predict(valid)

In [16]:
print(y)

Date
2017-12-27    0.237350
2017-12-28    0.505541
2017-12-29    0.055559
2018-01-02    0.717030
2018-01-03    0.194847
2018-01-04    0.380358
2018-01-05    0.226048
2018-01-08    0.334559
2018-01-09    0.172396
2018-01-10    0.580466
2018-01-11    1.381331
2018-01-12    0.388730
2018-01-16   -0.385250
2018-01-17    0.526986
2018-01-18   -0.064659
2018-01-19    1.059931
2018-01-22    0.290019
2018-01-23    0.399307
2018-01-24   -0.027711
2018-01-25    0.331845
2018-01-26    0.079742
2018-01-29    0.121637
2018-01-30    0.135341
2018-01-31   -0.076204
2018-02-01    0.596597
2018-02-02   -0.194330
2018-02-05   -0.268749
2018-02-06    0.364324
2018-02-07    0.675004
2018-02-08    0.179163
                ...   
2019-05-17   -0.192001
2019-05-20    0.327323
2019-05-21    0.721150
2019-05-22   -0.231290
2019-05-23   -0.458086
2019-05-24    0.829886
2019-05-28    0.161588
2019-05-29    0.176385
2019-05-30   -0.030197
2019-05-31   -0.110578
2019-06-03    0.984390
2019-06-04    1.131896
2019-0