In [1]:
#%matplotlib inline
import time
import datetime
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick_ohlc, candlestick2_ohlc
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
%matplotlib


Using matplotlib backend: TkAgg




In [2]:
tick_data = pd.read_feather('../data/processed/ES_tick.feather')
tick_data.head()

Unnamed: 0,date,last,bid,ask,volume
0,2016-09-15 00:00:00.292000-04:00,2112.25,2112.25,2112.5,8
1,2016-09-15 00:00:01.425000-04:00,2112.5,2112.5,2112.75,1
2,2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1
3,2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1
4,2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1


In [3]:
#Create Index from date column
tick_data.index = tick_data['date']
tick_data.drop(labels=['date'],axis=1,inplace=True)
tick_data.head()

Unnamed: 0_level_0,last,bid,ask,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-15 00:00:00.292000-04:00,2112.25,2112.25,2112.5,8
2016-09-15 00:00:01.425000-04:00,2112.5,2112.5,2112.75,1
2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1
2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1
2016-09-15 00:00:05.201000-04:00,2112.25,2112.25,2112.5,1


In [5]:
#Resample to get 5min bars
five_min_data = pd.DataFrame(
    tick_data['last'].resample('5Min', loffset=datetime.timedelta(minutes=5)).ohlc())

In [6]:
#We hack the NYSE Calendar extending the close until 4:15
class CMERTHCalendar(mcal.exchange_calendar_nyse.NYSEExchangeCalendar):
    @property
    def close_time(self):
        return datetime.time(16, 15)

nyse = CMERTHCalendar()
schedule = nyse.schedule(start_date=five_min_data.index.min(), 
                         end_date=five_min_data.index.max())

In [7]:
#Filter out those bars that occur during RTH
five_min_data['dates'] = pd.to_datetime(five_min_data.index.to_datetime().date)
five_min_data['valid_date'] = five_min_data['dates'].isin(schedule.index)
five_min_data['valid_time'] = False
during_rth = five_min_data['valid_date'] & \
            (five_min_data.index > schedule.loc[five_min_data['dates'],'market_open']) & \
            (five_min_data.index <= schedule.loc[five_min_data['dates'],'market_close'])
five_min_data.loc[during_rth, 'valid_time'] = True
five_min_data = five_min_data[five_min_data['valid_time'] == True]
five_min_data.drop(['dates','valid_date','valid_time'], axis=1, inplace=True)

#Only use data from 7/29 onward -- Avoid null values -- bad tick data
five_min_data = five_min_data[five_min_data.index > '2017-07-29']

#Reset index
five_min_data.reset_index(inplace=True)

five_min_data.head()

Unnamed: 0,date,open,high,low,close
0,2017-07-31 09:35:00-04:00,2474.75,2475.75,2474.0,2475.5
1,2017-07-31 09:40:00-04:00,2475.25,2476.0,2473.75,2475.5
2,2017-07-31 09:45:00-04:00,2475.75,2475.75,2474.5,2474.75
3,2017-07-31 09:50:00-04:00,2474.5,2475.0,2473.5,2473.75
4,2017-07-31 09:55:00-04:00,2474.0,2474.25,2472.75,2472.75


In [5]:
%load_ext line_profiler

In [22]:
%prun??

In [39]:
%%time
fig, ax = plt.subplots(figsize=(10,5))
for i in range(0, 100):
    graph_data = five_min_data.iloc[i:i+100]
    x = graph_data['date'].tolist()
    y = graph_data['close'].tolist()
    if i == 0:
        line, = ax.plot(x,y, color='blue')
        ax.set_xlim(x[0],x[-1])
    else:
        #line.set_data(x,y)
        line, = ax.plot(x,y, color='blue')
        ax.set_xlim(x[0],x[-1])
    #plt.show()
    #print(ax.get_xlim())
    plt.pause(.01)

CPU times: user 4 s, sys: 96 ms, total: 4.09 s
Wall time: 5.3 s


In [28]:
five_min_data['x'] = five_min_data.index
five_min_data['ema'] = five_min_data['close'].ewm(span=20, min_periods=20).mean()
five_min_data.tail()

Unnamed: 0,date,open,high,low,close,x,ema
10000,2018-01-25 15:55:00-05:00,2838.5,2840.5,2838.25,2838.5,10000,2838.062309
10001,2018-01-25 16:00:00-05:00,2838.75,2840.75,2838.25,2839.75,10001,2838.223042
10002,2018-01-25 16:05:00-05:00,2840.0,2841.0,2839.5,2840.0,10002,2838.392276
10003,2018-01-25 16:10:00-05:00,2840.25,2840.75,2840.0,2840.75,10003,2838.616821
10004,2018-01-25 16:15:00-05:00,2840.75,2841.75,2840.5,2841.0,10004,2838.84379


In [31]:
%%time
fig, ax = plt.subplots(figsize=(16,8))
#ax.patch.set(facecolor='w', edgecolor='k', linewidth=1.0)

def format_hour(x, pos=None):
    thisind = np.clip(int(x + 0.5), 0, len(five_min_data.index) - 1)
    return five_min_data['date'][thisind].strftime('%b %-d %I:%M')

ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_hour))

for i in range(0, 500):
    gd = five_min_data.iloc[i]
    subset = gd[['x','open','high','low','close']]
    tuples = [tuple(subset.values)]
    candlestick_ohlc(ax, tuples, width=.5, colorup='g', colordown='r', alpha=1)
    
    # Adjust axes
    i_min = max(0,i-100)
    i_max = i + 1 if i < 100 else i_min + 101
    ax.set_xlim(i_min - 0.5, i_max + 0.5)
    y_max = five_min_data.iloc[i_min:i_max]['high'].max()
    y_min = five_min_data.iloc[i_min:i_max]['low'].min()
    ax.set_ylim(y_min - 1, y_max + 1)
    
    # Plot vertical lines indicating new trading day
    ts = five_min_data.iloc[i]['date']
    if (ts.hour == 9) and (ts.minute == 35):
        ax.axvline(i - 0.5, color='black', lw=0.5)
        
    # Plot ema
    ax.plot(five_min_data.iloc[i_min:i_max]['x'].tolist(), 
            five_min_data.iloc[i_min:i_max]['ema'].tolist(), 
            color='blue', lw=0.5) 
        
    plt.pause(.01)

CPU times: user 56.5 s, sys: 616 ms, total: 57.1 s
Wall time: 1min 3s


In [16]:
five_min_data.iloc[0:1]['high'].max()

2475.75

In [25]:
fig, ax = plt.subplots(figsize=(16,8))
gd = five_min_data.iloc[0:100]
subset = gd[['x','open','high','low','close']]
tuples = [tuple(x) for x in subset.values]
candlestick_ohlc(ax, tuples, width=.5, colorup='g', colordown='r', alpha=1)
ax.set_xlim(gd.iloc[0]['x'] - 5,gd.iloc[-1]['x'] + 2)

#Custom formatter for x axis
def format_hour(x, pos=None):
    thisind = np.clip(int(x + 0.5), 0, len(gd.index) - 1)
    print(thisind)
    return gd['date'][thisind].strftime('%I:%M')

ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_hour))
xticklabels = gd['x'].iloc[5::6].tolist()
ax.xaxis.set_major_locator(ticker.FixedLocator(xticklabels))

5
11
17
23
29
35
41
47
53
59
65
71
77
83
89
95
0
4
9
14
16
20
23
24
26
26
28
28
29
29
29
29
29
29
29
28
27
26
25
23
21
19
17
14
12
9
6
4
1
0
0
0
0
36
36
36
36
36
36
36
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
35
34
34
33
33
31
30
29
27
25
23
20
17
14
11
9
6
5
3
2
0
0
0


In [32]:
%prun candlestick_ohlc(ax, tuples, width=.5, colorup='g', colordown='r', alpha=1)

 