# Studying Stocks' Daily Returns vs Overnight Returns

When market opens each morning, stocks go thru a **Price Discovery** period called the **opening range**. It is usually a very volatile period.

This study will examine the differences if any, between a stock's daily return vs its overnight retunrs.


### Preliminary
For this study, we will use `pandas_datareader` to get historical returns;

In [11]:
import pandas as pd
import numpy as np
from functools import reduce
import re
#from unidecode import unidecode

# our own util functions
import mkt_dt_utils as dtutils

# pandas data reader
import pandas_datareader.data as web
from datetime import datetime as dt

# plotting
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
# Required for Plot.ly Offline
init_notebook_mode(connected=True)
# Cufflinks bind plotly to pandas dataframe in IPython Notebooks
cf.set_config_file(offline = False, world_readable = True, theme = 'ggplot')

## Getting Stock Returns
for our study, we are interested in the [**FANG** stocks](https://www.investopedia.com/terms/f/fang-stocks-fb-amzn.asp): `FB`, `AMZN`, `NFLX`, [`GOOG`](https://investorplace.com/2019/01/goog-google-stock-split/)

The [relevant benchmark would then be NASDAQ](https://www.forbes.com/sites/jaysomaney/2016/12/30/comparing-facebook-amazon-netflix-and-google-aka-fang-performance-in-2016/#57a2bf8952f9), which we'll use `QQQ` as proxy for.

We'll also get `SPY` just to test our calculated **Beta** vs Yahoo! Finance. The calcuation is reference from [here](https://medium.com/python-data/capm-analysis-calculating-stock-beta-as-a-regression-in-python-c82d189db536)

In [40]:
l_symbols = ['FB','AMZN', 'NFLX', 'GOOG', 'GOOGL', 'QQQ', 'SPY']
edate = dt(2019,2,8)
sdate = dtutils.MarketDateAdj(edate, -100, 'NYSE')
yhoo_data = web.DataReader( l_symbols, 'yahoo', sdate, edate)

In [13]:
yhoo_data.head()

Attributes,High,High,High,High,High,High,Low,Low,Low,Low,...,Volume,Volume,Volume,Volume,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close
Symbols,AMZN,FB,GOOG,NFLX,QQQ,SPY,AMZN,FB,GOOG,NFLX,...,GOOG,NFLX,QQQ,SPY,AMZN,FB,GOOG,NFLX,QQQ,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-09-20,1955.0,166.449997,1189.890015,370.26001,184.970001,293.940002,1932.25,164.470001,1173.359985,363.170013,...,1210000.0,6768100.0,33295800.0,100360600.0,1944.300049,166.020004,1186.869995,365.359985,183.862152,290.560242
2018-09-21,1957.310059,167.25,1192.209961,372.220001,185.479996,293.220001,1910.5,162.809998,1166.040039,360.73999,...,4405600.0,11930600.0,38605400.0,105479700.0,1915.01001,162.929993,1166.089966,361.190002,182.856857,290.294769
2018-09-24,1936.880005,165.699997,1178.0,373.640015,183.960007,291.5,1865.0,160.880005,1146.910034,354.329987,...,1271000.0,9322500.0,34945600.0,53409600.0,1934.359985,165.410004,1173.369995,369.609985,183.365387,289.330414
2018-09-25,1975.910034,165.589996,1186.880005,371.339996,184.279999,291.649994,1938.849976,161.149994,1168.0,364.48999,...,977700.0,6799800.0,25277500.0,44370000.0,1974.550049,164.910004,1184.650024,369.429993,183.61467,289.061981
2018-09-26,1995.25,169.300003,1194.22998,382.0,185.990005,292.23999,1961.52002,164.210007,1174.765015,370.880005,...,1462300.0,13799700.0,36210600.0,79739700.0,1974.849976,166.949997,1180.48999,377.880005,183.744308,288.197052


In [14]:
def GetReturnsYHF( yhoo_data, overnight = False):
    data = yhoo_data.stack()
    data.reset_index(inplace = True)

    l_col_to_keep = ['Date', 'Symbols', 'Adj Close','Open', 'Volume']
    r_dfs = []

    # 1
    for sym in l_symbols:
        r_df = data[ data.Symbols == sym ].loc[:, l_col_to_keep]
        r_df = r_df.set_index('Date')
        
        r_df[f'r({sym})'] = r_df['Adj Close']/ r_df['Adj Close'].shift(1) - 1
        # or just do df.pct_change()
        if overnight:
            r_df[f'r({sym})'] = r_df['Open']/ r_df['Adj Close'].shift(1) - 1
        
        r_dfs.append( r_df.iloc[1:,:])

    # 2
    df = reduce( lambda x, y : pd.concat([x,y], axis =1),
                [ r_df.iloc[:,-1] for r_df in r_dfs ]
               )

    #df.sort_values(by = 'Date',ascending = False).head(3)
    
    return df

In [41]:
df_overnight_r = GetReturnsYHF(yhoo_data, overnight = True)
df_returns = GetReturnsYHF(yhoo_data)

### Here we define Beta and Alpha to Look at return vs benchmark

In [16]:
from scipy import stats
def GetBeta( r_sym , r_benchmark):
    slope, intercept, r_value, p_value, std_err = stats.linregress( r_sym, r_benchmark)
    return slope

def GetAlpha( r_sym , r_benchmark):
    beta = GetBeta( r_sym, r_benchmark)
    return r_sym - beta * r_benchmark

In [17]:
GetBeta( df_returns['r(FB)'], df_returns['r(QQQ)'])

0.5340511737328286

In [18]:
alpha_fb = GetAlpha( df_returns['r(FB)'], df_returns['r(QQQ)'])
alpha_fb.sort_index(ascending= False).head()

Date
2019-02-08    0.004662
2019-02-07   -0.016935
2019-02-06   -0.002322
2019-02-05    0.006560
2019-02-04    0.014729
dtype: float64

## Let's Compare Daily Returns vs Overnight Returns

In [19]:
dist_data = [df_returns['r(FB)'], df_overnight_r['r(FB)']]
dist_labels = [ 'daily returns', 'overnight returns']

fig = ff.create_distplot( dist_data, dist_labels, bin_size = 0.005,
                            show_rug= False
                        )
fig['layout'].update(title = 'Returns (Daily vs Overnight) Distplot')
iplot(fig)

In [20]:
r_std = np.std( dist_data[0])
r_mean = np.mean( dist_data[0])
or_std = np.std( dist_data[1])
or_mean = np.mean( dist_data[1])
print( f'{dist_labels[0]} std is {"{:.2%}".format(r_std)}\n{dist_labels[1]} std is {"{:.2%}".format(or_std)}')
print( f'{dist_labels[0]} mean is {"{:.2%}".format(r_mean)}\n{dist_labels[1]} mean is {"{:.2%}".format(or_mean)}')

daily returns std is 2.65%
overnight returns std is 1.66%
daily returns mean is 0.04%
overnight returns mean is -0.00%


## What about Daily vs Overnight Alphas?

In [21]:
d_alpha = GetAlpha( df_returns['r(FB)'], df_returns['r(QQQ)'])
o_alpha = GetAlpha( df_overnight_r['r(FB)'], df_overnight_r['r(QQQ)'])

dist_data = [d_alpha, o_alpha]
dist_labels = [ 'Alpha', 'Overnight Alpha']

fig = ff.create_distplot( dist_data, dist_labels, bin_size = 0.005,
                            show_rug= False
                        )
fig['layout'].update(title = 'Alpha (Daily vs Overnight) Distplot')
iplot(fig)

In [22]:
r_std = np.std( dist_data[0])
r_mean = np.mean( dist_data[0])
or_std = np.std( dist_data[1])
or_mean = np.mean( dist_data[1])
print( f'{dist_labels[0]} std is {"{:.2%}".format(r_std)}\n{dist_labels[1]} std is {"{:.2%}".format(or_std)}')
print( f'{dist_labels[0]} mean is {"{:.2%}".format(r_mean)}\n{dist_labels[1]} mean is {"{:.2%}".format(or_mean)}')

Alpha std is 2.00%
Overnight Alpha std is 1.42%
Alpha mean is 0.08%
Overnight Alpha mean is -0.09%


## Get Next Day's Opening Range
assume returns are [normally distributed](https://en.wikipedia.org/wiki/Normal_distribution)

In [23]:
def GetOpenRange( closePx, returns):
    '''
    returns expected open price and std * closePx
    '''
    r_mean = np.mean(returns)
    r_std = np.std(returns)
    
    e_px = np.exp(r_mean) * closePx
    e_std = (np.exp(r_mean + r_std) -1 ) * closePx
    return e_px, e_std

In [176]:
def GetOpeningRange(TradeDate, Ticker, NumDays = 100):
    edate = TradeDate
    sdate = dtutils.MarketDateAdj(edate, -NumDays, 'NYSE')
    prev_td = dtutils.MarketDateAdj(edate, -1, 'NYSE')
    data = web.DataReader( Ticker, 'yahoo', sdate, edate)

    # Exclude TradeDate's data in returns
    r_df = data[data.index.to_pydatetime() < TradeDate ]
    r_df['returns'] = r_df['Adj Close']/ r_df['Adj Close'].shift(1) - 1
    r_df['overnight'] = r_df['Open']/ r_df['Adj Close'].shift(1) - 1

    returns = r_df['overnight']
    r_mean = np.mean(returns)
    r_std = np.std(returns)
    
    closePx = data['Adj Close'][prev_td]
    openPx = data['Open'][TradeDate]
    e_px = np.exp(r_mean) * closePx
    e_std = (np.exp( r_mean + r_std) -1) * closePx
    
    return {'Expected Px': e_px, 'Std': e_std, 'Actual Open': openPx}

### Example: 'FB' Opening Range on Feb 2, 2019

In [179]:
GetOpeningRange(dt(2019,2,8), 'FB')

{'Expected Px': 166.39837474328306,
 'Std': 2.8032945954821686,
 'Actual Open': 164.47000122070312}

In [26]:
from iexfinance.stocks import Stock
fb = Stock('FB')
px_close = fb.get_price()

e_p, e_std = GetOpenRange( px_close, df_overnight_r['r(FB)'])
e_p, e_std = round(e_p,2), round(e_std,2)

print(f'FB closed at {px_close}')
print(f'--- Expected Open Range ---')
print(f'68%: {"{:.2f}".format(e_p - e_std)} - {"{:.2f}".format(e_p + e_std)}')
print(f'95%: {"{:.2f}".format(e_p - 2 * e_std)} - {"{:.2f}".format(e_p + 2 * e_std)}')
print(f'99.7%: {"{:.2f}".format(e_p - 3 * e_std)} - {"{:.2f}".format(e_p + 3 * e_std)}')

FB closed at 165.79
--- Expected Open Range ---
68%: 163.02 - 168.56
95%: 160.25 - 171.33
99.7%: 157.48 - 174.10


## Getting Sentiments

In [27]:
df_sent = pd.read_csv('dataset/nasdaq/daily_sentiment.csv', index_col = 0)
df_sent.head()

Unnamed: 0,stockcode,trade_date,sentiment_score
0,AMZN,2019-02-08,0.522779
1,AMZN,2019-02-07,0.586729
2,AMZN,2019-02-06,0.46602
3,AMZN,2019-02-05,0.30766
4,AMZN,2019-02-04,0.318894


### Let's Look at Stocks' Sentiment vs Returns

In [193]:
df_sent_returns = df_sent[ df_sent['trade_date']< '2019-02-09']
df_sent_returns['overnight_return'] = df_sent_returns.apply(
        lambda x: df_overnight_r[f'r({x["stockcode"]})'][x['trade_date']],
        axis = 1
    )


In [194]:
count_clean = len(df_sent_returns.dropna())
count_org = len(df_sent_returns)
print(f'Found {count_org - count_clean} NaN')

df_sent_returns = df_sent_returns.dropna()

Found 15 NaN


#### One Stock Example

In [204]:
ticker = 'AMZN'
idf = df_sent_returns[df_sent_returns['stockcode']== ticker]
corr = np.corrcoef(idf['sentiment_score'], idf['overnight_return'])
print(f'Sentiment vs Overnight Returns correlations for {ticker} for last {len(idf)} trade dates: {corr[1][0]}')

Sentiment vs Overnight Returns correlations for AMZN for last 59 trade dates: -0.08293926085480521


In [205]:
plotly_data = [go.Scatter(y = idf['overnight_return'], x = idf['sentiment_score'], mode = 'markers')
              ]
layout = go.Layout(
        title = f'overnight returns vs sentiments for {ticker}',
        yaxis = {'title': 'excess return'},
        xaxis = {'title': 'sentiment'}
)

fig = go.Figure( data = plotly_data, layout= layout)

iplot(fig)

#### What about Alpha??

In [206]:
idf_ = idf.dropna()
bm = 'QQQ'
idf_['overnight_bm'] = df_sent_returns.apply(
        lambda x: df_overnight_r[f'r({bm})'][x['trade_date']],
        axis = 1
    )
idf_['overnight_alpha'] = GetAlpha( idf_['overnight_return'], idf_['overnight_bm'])

corr_ = np.corrcoef(idf_['sentiment_score'], idf_['overnight_alpha'])
print(f'Sentiment vs Overnight Alpha correlations for {ticker} for last {len(idf)} trade dates: {corr_[1][0]}')


Sentiment vs Overnight Alpha correlations for AMZN for last 59 trade dates: -0.07611252915702978


### All Sentiments vs Returns

In [88]:
plotly_data = [go.Scatter(y = df_sent_returns['overnight_return'], x = df_sent_returns['sentiment_score'], mode = 'markers')
              ]
layout = go.Layout(
        title = 'overnight returns vs sentiments',
        yaxis = {'title': 'excess return'},
        xaxis = {'title': 'sentiment'}
)

fig = go.Figure( data = plotly_data, layout= layout)

iplot(fig)

### Stock Check Overnight Return is correct

In [66]:
edate = dt(2019,2,8)
sdate = dtutils.MarketDateAdj(edate, -100, 'NYSE')
y_data = web.DataReader( 'AMZN', 'yahoo', sdate, edate)
y_data.sort_index(ascending = False).head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-02-08,1588.589966,1566.76001,1586.0,1588.219971,5657500,1588.219971
2019-02-07,1625.540039,1592.910034,1625.0,1614.369995,4626600,1614.369995
2019-02-06,1672.26001,1633.339966,1670.75,1640.26001,3939900,1640.26001
2019-02-05,1665.26001,1642.5,1643.339966,1658.810059,4453100,1658.810059
2019-02-04,1649.630005,1613.5,1623.0,1633.310059,4929100,1633.310059


In [65]:
df_overnight_r[f'r({ticker})'].sort_index(ascending = False).head()

Date
2019-02-08   -0.017573
2019-02-07   -0.009303
2019-02-06    0.007198
2019-02-05    0.006141
2019-02-04   -0.001986
Name: r(AMZN), dtype: float64

## Looking at Sentiment Scores

In [87]:
ave_sent = np.mean(df_sent_returns['sentiment_score'])
std_sent = np.std( df_sent_returns['sentiment_score'])
print( f'Daily Sentiment Score has mean of {ave_sent} and std of {std_sent}')

Daily Sentiment Score has mean of 0.43707164664283904 and std of 0.1273346482754


In [85]:
dist_data = [df_sent_returns['sentiment_score']]
dist_labels = [ 'sentiment_score']

fig = ff.create_distplot( dist_data, dist_labels, bin_size = 0.005,
                            show_rug= False
                        )
fig['layout'].update(title = 'Daily Sentiment Score Distribution for FANG')
iplot(fig)

In [86]:
ticker = 'GOOGL'

df_ = df_sent_returns
dist_data = [df_[ df_['stockcode'] == ticker]['sentiment_score']]
dist_labels = [ 'sentiment_score']

fig = ff.create_distplot( dist_data, dist_labels, bin_size = 0.005,
                            show_rug= False
                        )
fig['layout'].update(title = f'Daily Sentiment Score Distribution for {ticker}')
iplot(fig)

## Get Z-Score and P-Value to see if a Sentiment Score is Positive or Negative
[reference](https://towardsdatascience.com/statistical-significance-hypothesis-testing-the-normal-curve-and-p-values-93274fa32687)

In [97]:
import scipy 

example = df_sent_returns.iloc[115]
ss_ = example['sentiment_score']
z_score = (ss_ - ave_sent) / std_sent
p_value = scipy.stats.norm.sf( abs(z_score))
print( f'{z_score} {p_value}')

1.5646044187931385 0.05883784607804491


In [222]:
def PrintSignal(zscore):
    buysell = 'Higher'
    if np.sign(zscore) == -1:
        buysell = 'Lower'
    
    conf_lvl = 'Low'
    if abs(zscore)> 1:
        conf_lvl = 'Moderate'
    if abs(zscore)> 2:
        conf_lvl = 'High'
    
    return f'Stock should open {buysell} with {conf_lvl} confidence.'

In [227]:
PrintSignal(0.5)

'Stock should open Higher with Low confidence.'

## Update CSV with Z-Score

In [114]:
df_sent = pd.read_csv('dataset/nasdaq/daily_sentiment.csv', index_col = 0)

df_ = df_sent #.dropna()
sent_mu = np.mean(df_['sentiment_score'])
sent_std = np.std( df_['sentiment_score'])

df_['z_score'] = df_['sentiment_score'].apply(
        lambda x : (x - sent_mu ) / sent_std
    )

df_.describe()

Unnamed: 0,sentiment_score,z_score
count,326.0,326.0
mean,0.435579,-3.2693680000000004e-17
std,0.128291,1.001537
min,-0.04448,-3.747697
25%,0.366718,-0.5375862
50%,0.432567,-0.02351789
75%,0.515777,0.6260795
max,0.79458,2.802624


In [115]:
csv_name = 'dataset/nasdaq/daily_sentiment.csv'
df_.to_csv(csv_name)

In [112]:
df_signif = df_[ df_['z_score'] < -2]
df_signif

Unnamed: 0,stockcode,trade_date,sentiment_score,z_score
7,AMZN,2019-01-30,0.112596,-2.521446
142,GOOGL,2018-10-11,0.0132,-3.297404
146,GOOGL,2018-10-04,0.06587,-2.886224
240,FB,2018-10-03,0.10469,-2.583166
248,FB,2019-02-11,0.13482,-2.347949
284,NFLX,2018-12-18,0.0939,-2.667401
301,NFLX,2018-11-20,-0.04448,-3.747697
320,NFLX,2018-10-24,0.16316,-2.126706
331,NFLX,2018-10-09,0.15893,-2.159729


In [111]:
df_sent_returns.iloc[df_signif.index]

Unnamed: 0,stockcode,trade_date,sentiment_score,overnight_return
49,AMZN,2018-11-27,0.7902,-0.003377
57,AMZN,2018-11-14,0.260373,0.015418
115,GOOGL,2018-11-19,0.49184,-0.004568
241,FB,2018-10-02,0.42408,-0.005294
267,NFLX,2019-01-14,0.33567,-0.009923
272,NFLX,2019-01-07,0.436389,0.015223
293,NFLX,2018-12-04,0.303138,-0.007475
