In [2]:
import sys
import simulator as sim
import numpy as np
import datetime as dt
import pandas as pd

SEC = 1000000
MIN = 60 * SEC
DOLLAR = 1000000

###############################################################################
# Store daily book data in a .txt file
###############################################################################

DATA_PATH = "data/"

class StoreDayData(object):
    
    def __init__(self, session, date, ticker, 
                 num_levels = 5,
                 start_time = sim.string_to_micro("09:30"), 
                 end_time = sim.string_to_micro("15:30")):
        self.session = session
        self.date = date
        self.ticker = ticker[0]
        self.start_time = start_time
        self.end_time = end_time
    
        self.num = (self.end_time - self.start_time) / MIN
        self.num_levels = num_levels
        self.time_index = 0
        
        # book_data is a ndarray with columns
        #     0: time
        #     1: average price (to obtain the output signal to feed the SVM)
        #   2-6: queue size of ask levels 1-5
        #  7-11: queue size of bid levels 1-5
        self.book_data = np.empty(shape = [self.num, 2 * self.num_levels + 2])
        
        self.session.subscribe_ticker_all_feeds(self.ticker)
        for i in range(self.num):
            self.session.add_timer(self.start_time + i * MIN, self.timer_getdata_callback)    
        
        
    def timer_getdata_callback(self, time):
        book = self.session.get_book_levels(self.ticker, nlevels = self.num_levels)
        bids = book["bids"]
        asks = book["asks"]
        
        self.book_data[self.time_index, 0] = time
        best_bid, best_ask = self.session.get_inside_market(self.ticker)
        self.book_data[self.time_index, 1] = (bids[0]["price"] + asks[0]["price"]) / 2.0 / DOLLAR
        
        for i in range(self.num_levels):
            self.book_data[self.time_index, i + 2] = asks[i]["size"]
            self.book_data[self.time_index, i + 7] = bids[i]["size"]
            
        self.time_index += 1
            
            
    def end(self):
        time = self.session.current_time()
        # print "DONDONDON: it's now {0}".format(sim.micro_to_time(time))
        filename = "{0}{1}_{2}_bookdata.txt".format(DATA_PATH, self.ticker, self.date)
        np.savetxt(filename, self.book_data, newline = "\n")
        print "Saved to {0}".format(filename)
        return
    
###############################################################################
# Read from book data file and convert it to feedable SVM data
###############################################################################
    
    
def treat_data(ticker, date, threshold = 0.01):
    raw_filename = "{0}{1}_{2}_bookdata.txt".format(DATA_PATH, ticker, date) 
    raw_data = np.loadtxt(raw_filename)
    num_data = raw_data.shape[0]
    num_lvls = (raw_data.shape[1] - 2) / 2
    buildup = [1, 2, 3, 5, 10, 15]
    
    # sig_data is the output signal for the SVM, as a (n, 1) ndarray
    sig_data = np.empty(shape = [num_data - 15, 1], dtype=np.int16)
    # obp_data is the SVM input as a (n, 6 * num_lvls) ndarray where
    # OBP(t, l) = obp_data[ , t*num_lvls + + l]
    obp_data = np.empty(shape = [num_data - 15, 6 * num_lvls])
    
    for i in range(15, num_data):
        #price_change = (raw_data[i, 1] - raw_data[i - 1, 1]) / raw_data[i - 1, 1]
        price_change = raw_data[i, 1] - raw_data[i - 1, 1]
        if price_change < -threshold:
            sig_data[i-15, 0] = -1
        elif price_change > threshold:
            sig_data[i-15, 0] = 1
        else:
            sig_data[i-15, 0] = 0
            
        for t in range(len(buildup)):
            for j in range(num_lvls):
                obp_data[i-15, t*num_lvls + j] = np.sum(raw_data[(i-buildup[t]):(i+1), 7:(j+8)]) / np.sum(raw_data[(i-buildup[t]):(i+1), 2:(j+3)])
                            
    obp_filename = "{0}{1}_{2}_obp.txt".format(DATA_PATH, ticker, date)
    sig_filename = "{0}{1}_{2}_sig.txt".format(DATA_PATH, ticker, date)
    np.savetxt(obp_filename, obp_data, newline = "\n")
    np.savetxt(sig_filename, sig_data, fmt="%d", newline = "\n")
    print "Saved to {0}".format(obp_filename)
    
#date = "20150121"
start = dt.date( 2015, 1, 21 )
end = dt.date( 2015, 2, 12 )
days = pd.bdate_range(start - dt.timedelta(days=10), end)
to_remove = ["20150119", "20150216", "20150403", "20150525", "20150703", "20150824", "20151126", "20151225",
             "20160101","20160118","20160215","20160325","20160530","20160704","20160905","20161124","20161226",
             "20170102","20170116","20170220","20170414","20170518"]
#days = days[!days['date'].isin(to_remove)]
symbols = ["IVV"]
start_time = sim.string_to_micro("9:30")
end_time = sim.string_to_micro("10:00")

#simul_storedata = sim.Simulator(StoreDayData)
#simul_storedata.run("20150120", symbols, num_levels = 5, start_time = start_time, end_time = end_time)
for day in days:
    training_date = '{:%Y%m%d}'.format(day.date())
    print training_date
    if training_date in to_remove:
        continue
    simul_storedata = sim.Simulator(StoreDayData)
    simul_storedata.run(training_date, symbols, num_levels = 5)
    for sym_it in symbols:
        treat_data(sym_it, training_date, threshold = 0.05)

20150112
Saved to data/IVV_20150112_bookdata.txt
Saved to data/IVV_20150112_obp.txt
20150113
Saved to data/IVV_20150113_bookdata.txt
Saved to data/IVV_20150113_obp.txt
20150114
Saved to data/IVV_20150114_bookdata.txt
Saved to data/IVV_20150114_obp.txt
20150115
Saved to data/IVV_20150115_bookdata.txt
Saved to data/IVV_20150115_obp.txt
20150116
Saved to data/IVV_20150116_bookdata.txt
Saved to data/IVV_20150116_obp.txt
20150119
20150120
Saved to data/IVV_20150120_bookdata.txt
Saved to data/IVV_20150120_obp.txt
20150121
Saved to data/IVV_20150121_bookdata.txt
Saved to data/IVV_20150121_obp.txt
20150122
Saved to data/IVV_20150122_bookdata.txt
Saved to data/IVV_20150122_obp.txt
20150123
Saved to data/IVV_20150123_bookdata.txt
Saved to data/IVV_20150123_obp.txt
20150126
Saved to data/IVV_20150126_bookdata.txt
Saved to data/IVV_20150126_obp.txt
20150127
Saved to data/IVV_20150127_bookdata.txt
Saved to data/IVV_20150127_obp.txt
20150128
Saved to data/IVV_20150128_bookdata.txt
Saved to data/IVV_