In [667]:
import pandas as pd
import numpy as np

from plotly.offline import plot 
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')


In [668]:
from plotly.offline import init_notebook_mode, iplot

In [685]:
df_full=pd.read_csv("data\stock_price.csv")
df_full.rename(columns=lambda x: x.strip(),inplace=True)
df_full['date']=pd.to_datetime(df_full['date'])

df=df_full[(df_full['date']>'2015-7-1')& (df_full['date']<='2018-1-1')]
# df=df_full.iloc[:1000]

# Feature Calculation
I am going to get all the basic features calculated, then the ones with my beliefs

In [686]:
def get_wick_length(ohlc):
    ohlc.sort(reverse=True)
    return abs((ohlc[0]-ohlc[1])+(ohlc[2]-ohlc[3]))
def get_wick_bull(ohlc):
    ohlc.sort(reverse=True)
    return (ohlc[0]-ohlc[1])
def get_wick_bear(ohlc):
    ohlc.sort(reverse=True)
    return (ohlc[2]-ohlc[3])

In [687]:
ewma_span=10
df['prev_close']=df['close'].shift(1)

# level 0 features
df['body_size']=np.abs(df['open']-df['close'])
df['gap']=df['open']-df['prev_close']
df['wick']=df.apply(lambda x: get_wick_length(list(x[['open','high','low','close']])),axis=1)
df['wick_bull']=df.apply(lambda x: get_wick_bull(list(x[['open','high','low','close']])),axis=1)
df['wick_bear']=df.apply(lambda x: get_wick_bear(list(x[['open','high','low','close']])),axis=1)

# level 1 features
df['vol_ema']=df['volume'].ewm(span=ewma_span).mean()
df['body_size_ewma']=df['body_size'].ewm(span=ewma_span).mean()
df['gap_ewma']=abs(df['gap']).ewm(span=ewma_span).mean()
df['wick_ewma']=abs(df['wick'].ewm(span=ewma_span).mean())
df['wick_bull_ewma']=abs(df['wick_bull'].ewm(span=ewma_span).mean())
df['wick_bear_ewma']=abs(df['wick_bear'].ewm(span=ewma_span).mean())

# level 2 featues
df['body_dev']=np.abs(df['body_size']-df['body_size_ewma'])/df['body_size_ewma']
df['vol_dev']=np.abs(df['volume']-df['vol_ema'])/df['vol_ema']
df['gap_dev']=(abs(df['gap'])-df['gap_ewma'])/df['gap_ewma'] # to preserve the sign of gap
df['wick_dev']=(df['wick']-df['wick_ewma'])/df['wick_ewma']
df['wick_bull_dev']=(df['wick_bull']-df['wick_bull_ewma'])/df['wick_bull_ewma']
df['wick_bear_dev']=(df['wick_bear']-df['wick_bear_ewma'])/df['wick_bear_ewma']

# level 3 features
# Volume anomaly 
df['vol_anomaly']=(df['body_dev']*(1-df['vol_dev']))
df.loc[df['vol_anomaly']<=0,'vol_anomaly'] = 0 # forcing the negatives to zero to avoid those really high anomalies

# Wick Anomaly - I dont care if wick is low, only care about extremely high wick
df.loc[df['wick_dev']<=0,'wick_dev'] = 0 # I dont think this feature is necessary
df.loc[df['wick_bull_dev']<=0,'wick_bull_dev'] = 0
df.loc[df['wick_bear_dev']<=0,'wick_bear_dev'] = 0

# Gap Anomaly - I dont care if Gap is low, only care about extremely high Gap
df.loc[df['gap_dev']<=0,'gap_dev'] = 0
df['gap_signal']=np.sign(df['gap'])*df['gap_dev']

In [717]:
# to know whether a bull or bearish candle
df['bull']=False
df.loc[df['close']>df['open'],'bull']=True

In [689]:
# Anomaly where a rising candle with falling volume and vice versa for 2 candles
cond_volume_bull=(df['volume']>df['volume'].shift(1))
cond_volume_bear=(df['volume']<df['volume'].shift(1))

cond_body_bull=(df['body_size']>df['body_size'].shift(1))
cond_body_bear=(df['body_size']<df['body_size'].shift(1))

# using these weird formulas because of the nans created by shift
cond_same_candle_bull=(df['bull']==df['bull'].shift(1)) & df['bull']
cond_same_candle_bear=(df['bull']==df['bull'].shift(1)) & ~df['bull']

condition=(cond_volume_bear & cond_body_bull & cond_same_candle_bull) | (cond_volume_bull & cond_body_bear & cond_same_candle_bear)
# condition=condition & cond_same_candle
df['vol_anomaly_2']=0
df.loc[condition,'vol_anomaly_2']=1

In [690]:
# Anomaly where a rising candle with falling volume and vice versa for 3 candles
cond_volume_bull=((df['volume']>df['volume'].shift(1))&(df['volume'].shift(1)>df['volume'].shift(2))) 
cond_volume_bear=((df['volume']<df['volume'].shift(1))&(df['volume'].shift(1)<df['volume'].shift(2))) 

cond_body_bull=((df['body_size']>df['body_size'].shift(1))&(df['body_size'].shift(1)>df['body_size'].shift(2)))
cond_body_bear=((df['body_size']<df['body_size'].shift(1))&(df['body_size'].shift(1)<df['body_size'].shift(2)))

# using these weird formulas because of the nans created by shift
cond_same_candle_bull=(df['bull']==df['bull'].shift(1))&(df['bull'].shift(1)==df['bull'].shift(2)) & df['bull']
cond_same_candle_bear=(df['bull']==df['bull'].shift(1))&(df['bull'].shift(1)==df['bull'].shift(2)) & ~df['bull']

condition=(cond_volume_bear & cond_body_bull & cond_same_candle_bull) | (cond_volume_bull & cond_body_bear & cond_same_candle_bear)
# condition=condition & cond_same_candle
df['vol_anomaly_3']=0
df.loc[condition,'vol_anomaly_3']=1

In [691]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [692]:
df.tail()

Unnamed: 0,date,open,high,low,close,volume,prev_close,body_size,gap,wick,wick_bull,wick_bear,vol_ema,body_size_ewma,gap_ewma,wick_ewma,wick_bull_ewma,wick_bear_ewma,body_dev,vol_dev,gap_dev,wick_dev,wick_bull_dev,wick_bear_dev,vol_anomaly,gap_signal,bull,vol_anomaly_2,vol_anomaly_3
3268,2017-12-22,58.88,59.65,56.63,56.9,1946330,59.23,1.98,-0.35,1.04,0.77,0.27,1042469.0,1.055791,0.190658,0.754578,0.406962,0.347617,0.875372,0.86704,0.835748,0.378253,0.89207,0.0,0.11639,-0.835748,False,0,0
3269,2017-12-26,57.17,57.85,56.3,56.87,765951,56.9,0.3,0.27,1.25,0.68,0.57,992192.6,0.918374,0.205084,0.844655,0.456605,0.38805,0.673336,0.228022,0.316535,0.479894,0.489252,0.468883,0.5198,0.316535,False,0,0
3270,2017-12-27,56.8,57.69,56.7,57.35,406108,56.87,0.55,-0.07,0.44,0.34,0.1,885631.8,0.851397,0.180523,0.771081,0.435404,0.335677,0.354003,0.541448,0.0,0.0,0.0,0.0,0.162329,-0.0,True,0,0
3271,2017-12-28,57.54,58.01,57.05,57.13,400590,57.35,0.41,0.19,0.55,0.47,0.08,797442.4,0.771143,0.182246,0.730885,0.441694,0.28919,0.468322,0.497656,0.042546,0.0,0.064084,0.0,0.235258,0.042546,False,0,0
3272,2017-12-29,57.25,57.3595,56.09,56.11,533376,57.13,1.14,0.12,0.1295,0.1095,0.02,749430.3,0.838208,0.170929,0.621542,0.381295,0.240247,0.360044,0.288291,0.0,0.0,0.0,0.0,0.256247,0.0,False,0,0


In [725]:
# interested_feature='vol_anomaly'
interested_feature='vol_anomaly'

In [726]:
# Plotting
data = [ dict(
    type = 'candlestick',
    open = df.open,
    high = df.high,
    low = df.low,
    close = df.close,
    x = df.index,
    yaxis = 'y1',
    name = 'price'
)]

data.append( dict( x=df.index, y=df.volume,                         
                         marker=dict( color='blue' ),
                         type='bar', yaxis='y2', name='Volume'))

data.append( dict( x=df.index, y=df[interested_feature],                         
                         marker=dict( color='red' ),
                         type='scatter', yaxis='y3', name=interested_feature))

layout=dict()
layout['xaxis'] = dict( rangeslider = dict( visible = False ),autorange=True,fixedrange=False )
layout['yaxis'] = dict( domain = [0.2, 1],autorange = True,fixedrange=False)
layout['yaxis2'] = dict( domain = [0.0, 0.1],autorange = True,fixedrange=False)
layout['yaxis3'] = dict( domain = [0.1, 0.2],autorange = True,fixedrange=False)

df.to_csv("stock_price_indicator.csv",index=True)

fig = dict( data=data, layout=layout )

In [727]:
figure=go.FigureWidget(data=data, layout=layout)

In [728]:
def zoom(layout, x_range):
    in_view = df.loc[figure.layout.xaxis.range[0]:figure.layout.xaxis.range[1]]
    figure.layout.yaxis2.range = [in_view.volume.min(), in_view.volume.max()]
    figure.layout.yaxis3.range = [in_view[interes].min(), in_view[y_name].max()]

In [729]:
figure.layout.on_change(zoom, 'xaxis.range')

In [730]:
figure.update_layout(margin=dict(l=20, r=10),paper_bgcolor="LightSteelBlue",width=2200, height=1000)

FigureWidget({
    'data': [{'close': array([76.09, 76.39, 76.07, ..., 57.35, 57.13, 56.11]),
              'h…

In [660]:
from ipywidgets import interactive

def select_feature(feat):
    figure.data[2]['y']=df[feat]
    figure.data[2]['name']=feat

feature_dropdown = interactive(select_feature, feat=df.columns)
feature_dropdown

interactive(children=(Dropdown(description='feat', options=('date', 'open', 'high', 'low', 'close', 'volume', …

In [None]:
def zoom(layout, x_range, y_name):
    print(y_name.name)
    in_view = df.loc[figure.layout.xaxis.range[0]:figure.layout.xaxis.range[1]]
    figure.layout.yaxis2.range = [in_view.volume.min(), in_view.volume.max()]
    figure.layout.yaxis3.range = [in_view[y_name].min(), in_view[y_name].max()]

In [None]:
figure.layout.on_change(zoom, 'xaxis.range','yaxis2')