In [1]:
import sys
import simulator as sim
import numpy as np
import datetime as dt
import pandas as pd

SEC = 1000000
MIN = 60 * SEC
DOLLAR = 1000000

###############################################################################
# Store daily book data in a .txt file
###############################################################################

DATA_PATH = "data/"

class StoreDayData(object):
    
    def __init__(self, session, date, ticker, 
                 num_levels = 5,
                 start_time = sim.string_to_micro("09:30"), 
                 end_time = sim.string_to_micro("15:30")):
        self.session = session
        self.date = date
        self.ticker = ticker
        self.start_time = start_time
        self.end_time = end_time
    
        self.num = (self.end_time - self.start_time) / MIN
        self.num_levels = num_levels
        self.time_index = 0
        
        # book_data is dict
        # keys: ticker
        #values: am ndarray with columns
        #     0: time
        #     1: average price (to obtain the output signal to feed the SVM)
        #   2-6: queue size of ask levels 1-5
        #  7-11: queue size of bid levels 1-5
        self.book_data = {}
        
        for sym_it in self.ticker:
            self.session.subscribe_ticker_all_feeds(sym_it)
            self.book_data[sym_it] = np.empty(shape = [self.num, 2 * self.num_levels + 2])
        for i in range(self.num):
            self.session.add_timer(self.start_time + i * MIN, self.timer_getdata_callback)    
        
        
    def timer_getdata_callback(self, time):
        #print "hihi, it is now {0}".format(sim.micro_to_time(time))
        for sym_it in self.ticker:
            book = self.session.get_book_levels(sym_it, nlevels = self.num_levels)
            #print book
            bids = book["bids"]
            asks = book["asks"]
        
            self.book_data[sym_it][self.time_index, 0] = time
            best_bid, best_ask = self.session.get_inside_market(sym_it)
            self.book_data[sym_it][self.time_index, 1] = (bids[0]["price"] + asks[0]["price"]) / 2.0 / DOLLAR

            for i in range(self.num_levels):
                #print i
                self.book_data[sym_it][self.time_index, i + 2] = asks[i]["size"]
                self.book_data[sym_it][self.time_index, i + 7] = bids[i]["size"]
        #print "current is {0}, max is {1}".format(self.time_index, self.num)
        self.time_index += 1
            
            
    def end(self):
        time = self.session.current_time()
        print "DONDONDON: it's now {0}".format(sim.micro_to_time(time))
        for sym_it in self.ticker:
            filename = "{0}{1}_{2}_bookdata.txt".format(DATA_PATH, sym_it, self.date)
            np.savetxt(filename, self.book_data[sym_it], newline = "\n")
            print "Saved to {0}".format(filename)
        return
    
###############################################################################
# Read from book data file and convert it to feedable SVM data
###############################################################################
    
    
def treat_data(ticker, date, threshold = 0.01):
    raw_filename = "{0}{1}_{2}_bookdata.txt".format(DATA_PATH, ticker, date) 
    raw_data = np.loadtxt(raw_filename)
    num_data = raw_data.shape[0]
    num_lvls = (raw_data.shape[1] - 2) / 2
    buildup = [1, 2, 3, 5, 10, 15]
    
    # sig_data is the output signal for the SVM, as a (n, 1) ndarray
    sig_data = np.empty(shape = [num_data - 15, 1], dtype=np.int16)
    # obp_data is the SVM input as a (n, 6 * num_lvls) ndarray where
    # OBP(t, l) = obp_data[ , t*num_lvls + + l]
    obp_data = np.empty(shape = [num_data - 15, 6 * num_lvls])
    
    for i in range(15, num_data):
        #price_change = (raw_data[i, 1] - raw_data[i - 1, 1]) / raw_data[i - 1, 1]
        price_change = raw_data[i, 1] - raw_data[i - 1, 1]
        if price_change < -threshold:
            sig_data[i-15, 0] = -1
        elif price_change > threshold:
            sig_data[i-15, 0] = 1
        else:
            sig_data[i-15, 0] = 0
            
        for t in range(len(buildup)):
            for j in range(num_lvls):
                obp_data[i-15, t*num_lvls + j] = np.sum(raw_data[(i-buildup[t]):(i+1), 7:(j+8)]) / \
                    np.sum(raw_data[(i-buildup[t]):(i+1), 2:(j+3)])
                            
    obp_filename = "{0}{1}_{2}_obp.txt".format(DATA_PATH, ticker, date)
    sig_filename = "{0}{1}_{2}_sig.txt".format(DATA_PATH, ticker, date)
    np.savetxt(obp_filename, obp_data, newline = "\n")
    np.savetxt(sig_filename, sig_data, fmt="%d", newline = "\n")
    print "Saved to {0}".format(obp_filename)
    
#date = "20150121"
start = dt.date( 2015, 1, 21 )
end = dt.date( 2015, 4, 12 )
days = pd.bdate_range(start - dt.timedelta(days=10), end)
to_remove = ["20150119", "20150216", "20150403", "20150525", "20150703", "20150824", "20151126", "20151225",
             "20160101","20160118","20160215","20160325","20160530","20160704","20160905","20161124","20161226",
             "20170102","20170116","20170220","20170414","20170518"]
#days = days[!days['date'].isin(to_remove)]
market = ["IVV"]
basic_large = ["PG", "UN", "UL", "BHP", "DOW"]
basic_mid = ["STLD", "FBHS", "SQM", "WLK", "SEE"]
tech_large = ["AAPL", "GOOG", "MSFT", "FB", "ORCL"]
tech_mid = ["IPG", "CDNS", "DOX", "VRSN", "FLEX"]
finance_large = ["JPM", "WFC", "BAC", "HSBC", "C"]
finance_mid = ["ALLY", "CBOE", "WF", "ETFC", "AVAL"]
health_large = ["JNJ", "NVS", "PFE", "MRK", "UNH"]
health_mid = ["SGEN", "VAR", "TFX", "ALKS", "JAZZ"]
energy_large = ["XOM", "GE", "CVX", "TOT", "PTR"]
energy_mid = ["XEC", "TSO", "YPF", "EQT", "PE"]
symbols = basic_large + tech_large + finance_large + health_large + energy_large + market
print symbols
start_time = sim.string_to_micro("9:30")
end_time = sim.string_to_micro("15:30")

#simul_storedata = sim.Simulator(StoreDayData)
#simul_storedata.run("20150120", symbols, num_levels = 5, start_time = start_time, end_time = end_time)
for day in days:
    training_date = '{:%Y%m%d}'.format(day.date())
    print training_date
    if training_date in to_remove:
        continue
    simul_storedata = sim.Simulator(StoreDayData)
    simul_storedata.run(training_date, symbols, num_levels = 5)

    
#for sym_it in symbols:
    #treat_data(sym_it, training_date, threshold = 0.04)

['PG', 'UN', 'UL', 'BHP', 'DOW', 'AAPL', 'GOOG', 'MSFT', 'FB', 'ORCL', 'JPM', 'WFC', 'BAC', 'HSBC', 'C', 'JNJ', 'NVS', 'PFE', 'MRK', 'UNH', 'XOM', 'GE', 'CVX', 'TOT', 'PTR', 'IVV']
20150112
DONDONDON: it's now 18:04:59.753213
Saved to data/PG_20150112_bookdata.txt
Saved to data/UN_20150112_bookdata.txt
Saved to data/UL_20150112_bookdata.txt
Saved to data/BHP_20150112_bookdata.txt
Saved to data/DOW_20150112_bookdata.txt
Saved to data/AAPL_20150112_bookdata.txt
Saved to data/GOOG_20150112_bookdata.txt
Saved to data/MSFT_20150112_bookdata.txt
Saved to data/FB_20150112_bookdata.txt
Saved to data/ORCL_20150112_bookdata.txt
Saved to data/JPM_20150112_bookdata.txt
Saved to data/WFC_20150112_bookdata.txt
Saved to data/BAC_20150112_bookdata.txt
Saved to data/HSBC_20150112_bookdata.txt
Saved to data/C_20150112_bookdata.txt
Saved to data/JNJ_20150112_bookdata.txt
Saved to data/NVS_20150112_bookdata.txt
Saved to data/PFE_20150112_bookdata.txt
Saved to data/MRK_20150112_bookdata.txt
Saved to data/U

In [2]:
market = ["IVV"]
basic_large = ["PG", "UN", "UL", "BHP", "DOW"]
basic_mid = ["TECK", "FBHS", "SQM", "WPM", "SEE"]
tech_large = ["AAPL", "GOOG", "MSFT", "FB", "ORCL"]
tech_mid = ["IPG", "CDNS", "DOX", "VRSN", "FLEX"]
finance_large = ["JPM", "WFC", "BAC", "HSBC", "C"]
finance_mid = ["ATH", "CBOE", "WF", "ETFC", "AVAL"]
health_large = ["JNJ", "NVS", "PFE", "MRK", "UNH"]
health_mid = ["SGEN", "VAR", "TFX", "ALKS", "JAZZ"]
energy_large = ["XOM", "GE", "CVX", "TOT", "PTR"]
energy_mid = ["XEC", "TSO", "YPF", "EQT", "PE"]
symbols = market + basic_large + basic_mid + tech_large + tech_mid + finance_large + finance_mid + health_large + health_mid + energy_large + energy_mid
print symbols

['IVV', 'PG', 'UN', 'UL', 'BHP', 'DOW', 'TECK', 'FBHS', 'SQM', 'WPM', 'SEE', 'AAPL', 'GOOG', 'MSFT', 'FB', 'ORCL', 'IPG', 'CDNS', 'DOX', 'VRSN', 'FLEX', 'JPM', 'WFC', 'BAC', 'HSBC', 'C', 'ATH', 'CBOE', 'WF', 'ETFC', 'AVAL', 'JNJ', 'NVS', 'PFE', 'MRK', 'UNH', 'SGEN', 'VAR', 'TFX', 'ALKS', 'JAZZ', 'XOM', 'GE', 'CVX', 'TOT', 'PTR', 'XEC', 'TSO', 'YPF', 'EQT', 'PE']
