In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../../../')

In [3]:
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from config import STOCKTWITS_TICKER_LIST
from util.file_util import (
    StockTwitsFileReader, StockDataFileReader
)
from util.plot_util import plot_twit_series_for_ticker
from util.ts_util import resample_weekly
from util.ff_util import read_ff_factors_daily, get_ff_factors_with_freq

In [4]:
with open('./ticker_at_least_2_median.pkl', 'rb') as f:
    all_ticker_list = pickle.load(f)

In [5]:
twit_file_reader = StockTwitsFileReader()
stock_data_reader = StockDataFileReader()

In [6]:
start_date = '2017-01-01'
end_date = '2019-08-10'

In [7]:
def get_weekly_twits_for_ticker(ticker,
                                start_date,
                                end_date):
    twit_df = twit_file_reader.read_daily_sentiment_summary_prelim(ticker,
                                                                   start_date=start_date,
                                                                   end_date=end_date)
    weekly_twit_df = resample_weekly(twit_df)
    weekly_twit_df['B_m_B'] = weekly_twit_df['Bullish'] - weekly_twit_df['Bearish']
    weekly_twit_df['ticker'] = ticker
    
    return weekly_twit_df

def get_weekly_returns_for_ticker(ticker,
                                  start_date,
                                  end_date):
    price_ts = stock_data_reader.read_stockdata_in_range(ticker,
                                                         start_date,
                                                         end_date)
    weekly_price_ts = price_ts.resample('W-FRI').last()
    weekly_return_ts = weekly_price_ts.pct_change()
    weekly_return_ts['ticker'] = ticker
    return weekly_return_ts

In [8]:
def get_weekly_returns_for_ticker_list(ticker_list,
                                       start_date,
                                       end_date):
    return_dfs = []

    for ticker in ticker_list:
        return_dfs.append(get_weekly_returns_for_ticker(ticker,
                                                        start_date,
                                                        end_date))

    return_df = pd.concat(return_dfs)
    return_df = return_df.pivot_table(values='adjusted close', 
                                      columns=['ticker'], 
                                      index=return_df.index).sort_index()
    return return_df

In [9]:
dfs = []

for ticker in all_ticker_list:
    dfs.append(get_weekly_twits_for_ticker(ticker,
                                           start_date,
                                           end_date))

df = pd.concat(dfs)

In [10]:
weekly_signal_df = df.pivot_table(values='B_m_B', columns=['ticker'], index=df.index)

In [11]:
weekly_signal_df = weekly_signal_df.fillna(0)

In [12]:
quantile_lables = ['q1', 'q2', 'q3', 'q4', 'q5']

def label_rows_by_quantile(row):
    result = pd.qcut(row, 5, labels=quantile_lables,
                     duplicates='drop')
    return result

def get_portfolio_signal_for_quantile(weekly_signal_df, quantile):
    
    q_label_df = weekly_signal_df.apply(
        label_rows_by_quantile, axis=1).sort_index()
    
    portfolio_ts = (q_label_df == quantile).astype(int)
    portfolio_ts_one_time_ahead = portfolio_ts.shift(1)
    return portfolio_ts_one_time_ahead

def calc_portfolio_returns(portfolio_holding_df,
                           return_df):
    assert np.all(return_df.columns == portfolio_holding_df.columns)
    n_holding_ts = portfolio_holding_df.sum(axis=1)
    
    portfolio_returns = return_df * portfolio_holding_df
    portfolio_returns = (portfolio_returns.sum(axis=1) 
                         / n_holding_ts).iloc[1:]
    return portfolio_returns

In [13]:
portfolio_holding_df_q1 = get_portfolio_signal_for_quantile(weekly_signal_df,
                                                            'q1')
portfolio_holding_df_q5 = get_portfolio_signal_for_quantile(weekly_signal_df,
                                                            'q5')

In [14]:
return_df = get_weekly_returns_for_ticker_list(all_ticker_list,
                                               start_date,
                                               end_date)

In [24]:
portfolio_return_df_q1 = calc_portfolio_returns(portfolio_holding_df_q1,
                                                return_df)
portfolio_return_df_q5 = calc_portfolio_returns(portfolio_holding_df_q5,
                                                return_df)

In [25]:
# strategy - long q1, short q5
portfolio_return_df_strategy = portfolio_return_df_q1 - portfolio_return_df_q5

### Read Fama French Data

In [26]:
ff_df_wkly = get_ff_factors_with_freq()

In [27]:
ff_df_wkly.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,Mom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-11-05,0.008617,-0.004793,0.00639,0.011976
1926-11-12,0.011203,0.00079,-0.005505,-0.008418
1926-11-19,-0.011878,0.004678,0.002586,0.011541
1926-11-26,0.015893,-0.000504,0.003499,-0.005317
1926-12-03,0.009307,-0.0015,-0.008983,-0.00222


### Compare Q5 vs. Q1 portfolio
- Assume equal (dollar) weighting for now (which means we avg the returns)

In [28]:
import statsmodels.formula.api as smf

In [40]:
def calc_portfolio_performance(portfolio_return_weekly_df,
                               ff_df_weekly):
    avg_annualized_return = (1 + portfolio_return_weekly_df.mean()) ** 52
    
    all_df = portfolio_return_weekly_df.to_frame(
        'portfolio_return').merge(ff_df_wkly,
                                  left_index=True, 
                                  right_index=True)
    lm = smf.ols(
        "portfolio_return ~ Q('Mkt-RF') + SMB + HML + Mom", data=all_df).fit()
    avg_annualized_alpha = lm.params['Intercept']
    
    return {
        'lm': lm,
        'annualized_return': avg_annualized_return,
        'annualized_alpha': avg_annualized_alpha,
    }

In [41]:
performance_q1 = calc_portfolio_performance(portfolio_return_df_q1,
                                            ff_df_wkly)
performance_q5 = calc_portfolio_performance(portfolio_return_df_q5,
                                            ff_df_wkly)
performance_strategy = calc_portfolio_performance(portfolio_return_df_strategy,
                                                  ff_df_wkly)

In [43]:
performance_q5['lm'].summary()

0,1,2,3
Dep. Variable:,portfolio_return,R-squared:,0.92
Model:,OLS,Adj. R-squared:,0.918
Method:,Least Squares,F-statistic:,374.5
Date:,"Mon, 04 Nov 2019",Prob (F-statistic):,2.69e-70
Time:,05:51:38,Log-Likelihood:,495.78
No. Observations:,135,AIC:,-981.6
Df Residuals:,130,BIC:,-967.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0003,0.001,-0.461,0.646,-0.001,0.001
Q('Mkt-RF'),1.0776,0.029,37.400,0.000,1.021,1.135
SMB,0.0602,0.052,1.162,0.247,-0.042,0.163
HML,-0.2468,0.053,-4.684,0.000,-0.351,-0.143
Mom,-0.0291,0.046,-0.639,0.524,-0.119,0.061

0,1,2,3
Omnibus:,0.292,Durbin-Watson:,2.327
Prob(Omnibus):,0.864,Jarque-Bera (JB):,0.103
Skew:,0.057,Prob(JB):,0.95
Kurtosis:,3.071,Cond. No.,112.0
