In [1]:
import pandas as pd
import numpy as np

from plotly.offline import plot 
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')


In [2]:
from plotly.offline import init_notebook_mode, iplot

In [3]:
df_full=pd.read_csv("stock_price.csv")
df_full.rename(columns=lambda x: x.strip(),inplace=True)
df_full['date']=pd.to_datetime(df_full['date'])

df=df_full[(df_full['date']>'2015-7-1')& (df_full['date']<='2018-1-1')]
# df=df_full.iloc[:1000]

# Feature Calculation
I am going to get all the basic features calculated, then the ones with my beliefs

In [4]:
def get_wick_length(ohlc):
    ohlc.sort(reverse=True)
    return abs((ohlc[0]-ohlc[1])+(ohlc[2]-ohlc[3]))
def get_wick_bull(ohlc):
    ohlc.sort(reverse=True)
    return (ohlc[0]-ohlc[1])
def get_wick_bear(ohlc):
    ohlc.sort(reverse=True)
    return (ohlc[2]-ohlc[3])

In [5]:
ewma_span=10
df['prev_close']=df['close'].shift(1)

# level 0 features
df['body_size']=np.abs(df['open']-df['close'])
df['gap']=df['open']-df['prev_close']
df['wick']=df.apply(lambda x: get_wick_length(list(x[['open','high','low','close']])),axis=1)
df['wick_bull']=df.apply(lambda x: get_wick_bull(list(x[['open','high','low','close']])),axis=1)
df['wick_bear']=df.apply(lambda x: get_wick_bear(list(x[['open','high','low','close']])),axis=1)

# level 1 features
df['vol_ema']=df['volume'].ewm(span=ewma_span).mean()
df['body_size_ewma']=df['body_size'].ewm(span=ewma_span).mean()
df['gap_ewma']=abs(df['gap']).ewm(span=ewma_span).mean()
df['wick_ewma']=abs(df['wick'].ewm(span=ewma_span).mean())
df['wick_bull_ewma']=abs(df['wick_bull'].ewm(span=ewma_span).mean())
df['wick_bear_ewma']=abs(df['wick_bear'].ewm(span=ewma_span).mean())

# level 2 featues
df['body_dev']=np.abs(df['body_size']-df['body_size_ewma'])/df['body_size_ewma']
df['vol_dev']=np.abs(df['volume']-df['vol_ema'])/df['vol_ema']
df['gap_dev']=(abs(df['gap'])-df['gap_ewma'])/df['gap_ewma'] # to preserve the sign of gap
df['wick_dev']=(df['wick']-df['wick_ewma'])/df['wick_ewma']
df['wick_bull_dev']=(df['wick_bull']-df['wick_bull_ewma'])/df['wick_bull_ewma']
df['wick_bear_dev']=(df['wick_bear']-df['wick_bear_ewma'])/df['wick_bear_ewma']

# level 3 features
# Volume anomaly 
df['vol_anomaly']=(df['body_dev']*(1-df['vol_dev']))
df.loc[df['vol_anomaly']<=0,'vol_anomaly'] = 0 # forcing the negatives to zero to avoid those really high anomalies

# Wick Anomaly - I dont care if wick is low, only care about extremely high wick
df.loc[df['wick_dev']<=0,'wick_dev'] = 0 # I dont think this feature is necessary
df.loc[df['wick_bull_dev']<=0,'wick_bull_dev'] = 0
df.loc[df['wick_bear_dev']<=0,'wick_bear_dev'] = 0

# Gap Anomaly - I dont care if Gap is low, only care about extremely high Gap
df.loc[df['gap_dev']<=0,'gap_dev'] = 0
df['gap_signal']=np.sign(df['gap'])*df['gap_dev']

In [6]:
# to know whether a bull or bearish candle
df['bull']=False
df.loc[df['close']>df['open'],'bull']=True

In [7]:
# Anomaly where a rising candle with falling volume and vice versa for 2 candles
cond_volume_bull=(df['volume']>df['volume'].shift(1))
cond_volume_bear=(df['volume']<df['volume'].shift(1))

cond_body_bull=(df['body_size']>df['body_size'].shift(1))
cond_body_bear=(df['body_size']<df['body_size'].shift(1))

# using these weird formulas because of the nans created by shift
cond_same_candle_bull=(df['bull']==df['bull'].shift(1)) & df['bull']
cond_same_candle_bear=(df['bull']==df['bull'].shift(1)) & ~df['bull']

condition=(cond_volume_bear & cond_body_bull & cond_same_candle_bull) | (cond_volume_bull & cond_body_bear & cond_same_candle_bear)
# condition=condition & cond_same_candle
df['vol_anomaly_2']=0
df.loc[condition,'vol_anomaly_2']=1

In [8]:
# Anomaly where a rising candle with falling volume and vice versa for 3 candles
cond_volume_bull=((df['volume']>df['volume'].shift(1))&(df['volume'].shift(1)>df['volume'].shift(2))) 
cond_volume_bear=((df['volume']<df['volume'].shift(1))&(df['volume'].shift(1)<df['volume'].shift(2))) 

cond_body_bull=((df['body_size']>df['body_size'].shift(1))&(df['body_size'].shift(1)>df['body_size'].shift(2)))
cond_body_bear=((df['body_size']<df['body_size'].shift(1))&(df['body_size'].shift(1)<df['body_size'].shift(2)))

# using these weird formulas because of the nans created by shift
cond_same_candle_bull=(df['bull']==df['bull'].shift(1))&(df['bull'].shift(1)==df['bull'].shift(2)) & df['bull']
cond_same_candle_bear=(df['bull']==df['bull'].shift(1))&(df['bull'].shift(1)==df['bull'].shift(2)) & ~df['bull']

condition=(cond_volume_bear & cond_body_bull & cond_same_candle_bull) | (cond_volume_bull & cond_body_bear & cond_same_candle_bear)
# condition=condition & cond_same_candle
df['vol_anomaly_3']=0
df.loc[condition,'vol_anomaly_3']=1

In [9]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [10]:
df.tail()

Unnamed: 0,date,open,high,low,close,volume,prev_close,body_size,gap,wick,wick_bull,wick_bear,vol_ema,body_size_ewma,gap_ewma,wick_ewma,wick_bull_ewma,wick_bear_ewma,body_dev,vol_dev,gap_dev,wick_dev,wick_bull_dev,wick_bear_dev,vol_anomaly,gap_signal,bull,vol_anomaly_2,vol_anomaly_3
492,2017-12-26,599.5,614.9,597.5,610.95,68306748,599.5,11.45,0.0,5.95,3.95,2.0,67046340.0,6.210835,1.97375,8.898031,5.300224,3.597807,0.843552,0.018799,0.0,0.0,0.0,0.0,0.827694,0.0,True,0,0
493,2017-12-27,612.0,623.8,609.3,612.3,92925123,610.95,0.3,1.05,14.2,11.5,2.7,71751570.0,5.136138,1.805796,9.862025,6.427456,3.434569,0.94159,0.295095,0.0,0.439867,0.789199,0.0,0.663732,0.0,True,0,0
494,2017-12-28,612.3,616.5,602.5,605.05,31973127,612.3,7.25,0.0,6.75,4.2,2.55,64519130.0,5.520477,1.477469,9.296202,6.022464,3.273738,0.313292,0.50444,0.0,0.0,0.0,0.0,0.155255,0.0,False,0,0
495,2017-12-29,605.0,611.5,604.65,607.15,23288475,605.05,2.15,-0.05,4.7,4.35,0.35,57022650.0,4.907663,1.217929,8.460529,5.71838,2.74215,0.56191,0.591593,0.0,0.0,0.0,0.0,0.229488,-0.0,True,0,0
496,2018-01-01,606.0,617.5,606.0,611.75,45549601,607.15,5.75,-1.15,5.75,5.75,0.0,54936640.0,5.060815,1.205579,7.967706,5.724129,2.243577,0.136181,0.17087,0.0,0.0,0.00452,0.0,0.112911,-0.0,True,0,0


In [11]:
# interested_feature='vol_anomaly'
interested_feature='vol_anomaly'

In [12]:
# Plotting
data = [ dict(
    type = 'candlestick',
    open = df.open,
    high = df.high,
    low = df.low,
    close = df.close,
    x = df.index,
    yaxis = 'y1',
    name = 'price'
)]

data.append( dict( x=df.index, y=df.volume,                         
                         marker=dict( color='blue' ),
                         type='bar', yaxis='y2', name='Volume'))

data.append( dict( x=df.index, y=df[interested_feature],                         
                         marker=dict( color='red' ),
                         type='scatter', yaxis='y3', name=interested_feature))

layout=dict()
layout['xaxis'] = dict( rangeslider = dict( visible = False ),autorange=True,fixedrange=False )
layout['yaxis'] = dict( domain = [0.2, 1],autorange = True,fixedrange=False)
layout['yaxis2'] = dict( domain = [0.0, 0.1],autorange = True,fixedrange=False)
layout['yaxis3'] = dict( domain = [0.1, 0.2],autorange = True,fixedrange=False)

df.to_csv("stock_price_indicator.csv",index=True)

fig = dict( data=data, layout=layout )

In [13]:
figure=go.FigureWidget(data=data, layout=layout)

In [14]:
def zoom(layout, x_range):
    in_view = df.loc[figure.layout.xaxis.range[0]:figure.layout.xaxis.range[1]]
    figure.layout.yaxis2.range = [in_view.volume.min(), in_view.volume.max()]
    figure.layout.yaxis3.range = [in_view[interes].min(), in_view[y_name].max()]

In [15]:
figure.layout.on_change(zoom, 'xaxis.range')

In [16]:
figure.update_layout(margin=dict(l=20, r=10),paper_bgcolor="LightSteelBlue",width=2200, height=1000)

FigureWidget({
    'data': [{'close': array([649.5 , 654.7 , 645.55, ..., 605.05, 607.15, 611.75]),
          …

In [660]:
from ipywidgets import interactive

def select_feature(feat):
    figure.data[2]['y']=df[feat]
    figure.data[2]['name']=feat

feature_dropdown = interactive(select_feature, feat=df.columns)
feature_dropdown

interactive(children=(Dropdown(description='feat', options=('date', 'open', 'high', 'low', 'close', 'volume', …

In [None]:
def zoom(layout, x_range, y_name):
    print(y_name.name)
    in_view = df.loc[figure.layout.xaxis.range[0]:figure.layout.xaxis.range[1]]
    figure.layout.yaxis2.range = [in_view.volume.min(), in_view.volume.max()]
    figure.layout.yaxis3.range = [in_view[y_name].min(), in_view[y_name].max()]

In [None]:
figure.layout.on_change(zoom, 'xaxis.range','yaxis2')