In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../../../')

In [3]:
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from config import STOCKTWITS_TICKER_LIST
from util.file_util import (
    StockTwitsFileReader, StockDataFileReader
)
from util.plot_util import plot_twit_series_for_ticker
from util.ts_util import resample_weekly
from util.ff_util import read_ff_factors_daily, get_ff_factors_with_freq

In [4]:
with open('./ticker_at_least_2_median.pkl', 'rb') as f:
    all_ticker_list = pickle.load(f)

In [5]:
twit_file_reader = StockTwitsFileReader()
stock_data_reader = StockDataFileReader()

In [6]:
start_date = '2017-01-01'
end_date = '2019-08-10'

In [7]:
def get_weekly_twits_for_ticker(ticker,
                                start_date,
                                end_date):
    twit_df = twit_file_reader.read_daily_sentiment_summary_prelim(ticker,
                                                                   start_date=start_date,
                                                                   end_date=end_date)
    weekly_twit_df = resample_weekly(twit_df)
    weekly_twit_df['B_m_B'] = weekly_twit_df['Bullish'] - weekly_twit_df['Bearish']
    weekly_twit_df['ticker'] = ticker
    
    return weekly_twit_df

def get_weekly_returns_for_ticker(ticker,
                                  start_date,
                                  end_date):
    price_ts = stock_data_reader.read_stockdata_in_range(ticker,
                                                         start_date,
                                                         end_date)
    weekly_price_ts = price_ts.resample('W-FRI').last()
    weekly_return_ts = weekly_price_ts.pct_change()
    weekly_return_ts['ticker'] = ticker
    return weekly_return_ts

In [8]:
dfs = []

for ticker in all_ticker_list:
    dfs.append(get_weekly_twits_for_ticker(ticker,
                                           start_date,
                                           end_date))

df = pd.concat(dfs)

In [9]:
weekly_signal_df = df.pivot_table(values='B_m_B', columns=['ticker'], index=df.index)

In [10]:
weekly_signal_df = weekly_signal_df.fillna(0)

In [11]:
quantile_lables = ['q1', 'q2', 'q3', 'q4', 'q5']

In [12]:
def label_rows_by_quantile(row):
    result = pd.qcut(row, 5, labels=quantile_lables,
                     duplicates='drop')
    return result

In [13]:
q_label_df = weekly_signal_df.apply(label_rows_by_quantile, axis=1).sort_index()

In [14]:
return_dfs = []

for ticker in all_ticker_list:
    return_dfs.append(get_weekly_returns_for_ticker(ticker,
                                                    start_date,
                                                    end_date))

return_df = pd.concat(return_dfs)

In [15]:
return_df = return_df.pivot_table(values='adjusted close', columns=['ticker'], index=return_df.index).sort_index()

In [16]:
return_df.shape, q_label_df.shape

((135, 201), (136, 201))

In [17]:
set(q_label_df.index.strftime('%Y-%m-%d')).symmetric_difference(
    set(return_df.index.strftime('%Y-%m-%d')))

{'2017-01-06'}

### Read Fama French Data

In [18]:
ff_df_wkly = get_ff_factors_with_freq()

In [19]:
ff_df_wkly.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1926-07-02,0.005504,-0.005592,-0.003598
1926-07-09,-0.002426,-0.002434,0.003885
1926-07-16,0.012828,-0.00093,-0.01188
1926-07-23,-0.01584,0.001983,-0.007611
1926-07-30,0.0243,-0.014254,-0.007945


### Compare Q5 vs. Q1 portfolio
- Assume equal (dollar) weighting for now (which means we avg the returns)

In [20]:
q_label_df.head()

ticker,AAL,AAPL,ABBV,ABMD,ABT,ACN,ADBE,ADSK,AGN,AIG,...,VRTX,VZ,WBA,WDC,WFC,WM,WMT,WYNN,XLNX,XOM
2017-01-06,q2,q5,q4,q1,q4,q2,q3,q1,q5,q3,...,q3,q4,q4,q2,q2,q2,q5,q5,q2,q2
2017-01-13,q4,q5,q3,q1,q4,q1,q3,q1,q5,q4,...,q2,q3,q3,q3,q3,q1,q4,q5,q1,q4
2017-01-20,q4,q5,q4,q2,q3,q1,q3,q2,q5,q3,...,q1,q3,q5,q3,q1,q1,q5,q5,q3,q5
2017-01-27,q4,q5,q4,q2,q3,q2,q4,q2,q5,q1,...,q3,q4,q4,q5,q3,q2,q1,q5,q4,q5
2017-02-03,q4,q5,q4,q2,q4,q1,q3,q2,q5,q1,...,q2,q4,q2,q3,q1,q2,q3,q4,q1,q5


In [67]:
portfolio_id = 'q2'

In [68]:
portfolio_ts = (q_label_df == portfolio_id).astype(int)

portfolio_ts_one_time_ahead = portfolio_ts.shift(1)

In [69]:
portfolio_ts_one_time_ahead.head()

ticker,AAL,AAPL,ABBV,ABMD,ABT,ACN,ADBE,ADSK,AGN,AIG,...,VRTX,VZ,WBA,WDC,WFC,WM,WMT,WYNN,XLNX,XOM
2017-01-06,,,,,,,,,,,...,,,,,,,,,,
2017-01-13,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0
2017-01-20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-01-27,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-02-03,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [70]:
n_holding_ts_one_time_ahead = portfolio_ts_one_time_ahead.sum(axis=1)

In [71]:
assert np.all(return_df.columns == portfolio_ts_one_time_ahead.columns)

In [72]:
return_df.head()

ticker,AAL,AAPL,ABBV,ABMD,ABT,ACN,ADBE,ADSK,AGN,AIG,...,VRTX,VZ,WBA,WDC,WFC,WM,WMT,WYNN,XLNX,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-13,0.03116,0.009584,-0.018413,0.002166,0.010162,0.005589,0.002124,0.025473,-0.01342,-0.006885,...,0.047613,-0.013332,0.008665,0.00881,0.004905,-0.005698,-0.016555,-0.020556,-0.009144,-0.024295
2017-01-20,0.007346,0.008064,-0.01355,-0.033111,-0.011484,-0.016075,0.020087,0.004304,-0.008328,0.002863,...,-0.021642,0.003236,-0.025054,0.022959,-0.004339,-0.001864,0.000745,0.003203,-0.008203,-0.005327
2017-01-27,-0.021875,0.01625,-0.018806,-0.026735,0.013345,0.002433,0.029627,0.001102,-0.005319,-0.019987,...,0.073737,-0.059181,-0.002691,0.094863,0.027601,-0.000287,-0.022626,0.134992,0.007927,-0.004425
2017-02-03,-0.058573,0.058466,0.011167,-0.035737,0.043415,-0.007456,0.010352,0.032412,0.08288,-0.004141,...,-0.007325,-0.020565,-0.012638,-0.004903,0.01889,0.000144,0.012793,-0.059759,-0.004617,-0.023038
2017-02-10,0.053243,0.027989,-0.004121,0.035061,-0.000934,0.027164,0.014587,-0.013979,0.067094,0.010317,...,0.016373,0.008233,0.035292,-0.022369,-0.007158,0.012777,0.022858,0.025615,0.002073,-0.003178


In [73]:
portfolio_returns = return_df * portfolio_ts_one_time_ahead

In [74]:
portfolio_returns = (portfolio_returns.sum(axis=1) / n_holding_ts_one_time_ahead).iloc[1:]

In [75]:
portfolio_returns.sum()

0.20468922937746287

In [76]:
(1 + portfolio_returns).product() - 1

0.19067402902960606

In [77]:
all_df = portfolio_returns.to_frame('portfolio_return').merge(ff_df_wkly,
                                                         left_index=True, right_index=True)

In [78]:
import statsmodels.formula.api as smf

In [79]:
all_df.head()

Unnamed: 0,portfolio_return,Mkt-RF,SMB,HML
2017-01-13,0.005,0.000879,0.004317,-0.009181
2017-01-20,0.000517,-0.003023,-0.011287,-0.00212
2017-01-27,0.009519,0.011592,0.002059,0.005644
2017-02-03,-0.005055,0.001544,0.003825,-0.00613
2017-02-10,0.009606,0.008802,-0.000356,-0.01097


In [80]:
lm = smf.ols("portfolio_return ~ Q('Mkt-RF') + SMB + HML", data=all_df).fit()

In [81]:
lm.summary()

0,1,2,3
Dep. Variable:,portfolio_return,R-squared:,0.891
Model:,OLS,Adj. R-squared:,0.888
Method:,Least Squares,F-statistic:,355.9
Date:,"Mon, 04 Nov 2019",Prob (F-statistic):,9.19e-63
Time:,04:11:21,Log-Likelihood:,479.41
No. Observations:,135,AIC:,-950.8
Df Residuals:,131,BIC:,-939.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0002,0.001,-0.262,0.793,-0.001,0.001
Q('Mkt-RF'),1.0375,0.032,32.121,0.000,0.974,1.101
SMB,0.1592,0.058,2.752,0.007,0.045,0.274
HML,0.1325,0.055,2.426,0.017,0.024,0.241

0,1,2,3
Omnibus:,1.416,Durbin-Watson:,1.789
Prob(Omnibus):,0.493,Jarque-Bera (JB):,1.178
Skew:,0.228,Prob(JB):,0.555
Kurtosis:,3.047,Cond. No.,98.2


In [66]:
(1+0.0014)**52 - 1

0.07546065654970602

In [None]:
all_df['portfolio_return'].mean() **