In [1]:
import pandas as pd
import json
import numpy as np

with open('data.json') as f:
    data = json.load(f)

%matplotlib qt


In [2]:
# Reading to dataframe
# Creating additional columns
gen = ((k, v['assetA']['bid'], v['assetA']['ask'], v['assetB']['bid'], v['assetB']['ask']) for k, v in data.items())
data_list = [x for x in gen]
df = pd.DataFrame(data_list, columns=['timestamp', 'assetA_bid', 'assetA_ask', 'assetB_bid', 'assetB_ask'])
df['delta_t'] = df['timestamp'].astype('uint64').diff()
df['trade_freq'] = df.apply (lambda row: 1/row['delta_t'], axis=1)
df['trade_freq_avg'] = df['trade_freq'].rolling(window=15).mean()
df = df.set_index('timestamp')

In [20]:
# Lets chart some analytics - trade frequency, tick frequency
import matplotlib.pyplot as plt

fig0, (ax00, ax01) = plt.subplots(2, 1)
ax00.set_title('Tick time delta histogram (up to 2 seconds)')
ax01.set_title('Tick time delta averaged in time')
df['delta_t'].hist(ax = ax00, bins = np.arange(10,2000,100))
df['delta_t'].rolling(window=30).mean().rolling(window=30).mean().rolling(window=30).mean().plot(ax = ax01)

# Conclutions:
# 1. the samples are highlty uneaven with rare time deltas up to several minutes. Most time deltas between the samples however are between 50 and 500 ms
# 2. Due to (1) several approaches can be taked to fight non uniformity of samples
#   a. upsample missing timepoints
#   b. craete new more uniform ticks by special transformation (time, volume, cumulative price ticks or entropy based ticks)
#   c. train predictor in such a way that accounts for timme deltas between samples
#   d. assume low impact of non uniformity and treat as uniform (probably a bad idea but will do for start)

<matplotlib.axes._subplots.AxesSubplot at 0x24eb8418c10>

In [38]:
# Plot the ask/bid data for analysis
# Get the timeline data 
start = df.iloc[[0]].index.astype('uint64')
end = df.iloc[[-1]].index.astype('uint64')
timedelta = end-start
print(f'The time period is: {timedelta/1000/60/60/24} days')
print(f'Maximum possible trades in this period is: {timedelta/1000/30} trades')

# Setting up the grids
delta_t = end.to_numpy()-start.to_numpy()
number_of_ticks = len(df)
minor_grid_min = 0.5
ticksPerHour = number_of_ticks/(delta_t/1000/60/60)
ticksPerMinorGrid = number_of_ticks/(delta_t/1000/60)*minor_grid_min

hour_grid = np.arange(-0.2*number_of_ticks, 1.2*number_of_ticks, ticksPerHour)
minor_grid = np.arange(-0.2*number_of_ticks, 1.2*number_of_ticks, ticksPerMinorGrid)

# Plot the next graphs:
# Subplot 1
# a. A spread
# b. B spread
# c. A ask and bid
# c. B ask and bid

SpreadA = df['assetA_ask'].sub(df['assetA_bid'])
SpreadB = df['assetB_ask'].sub(df['assetB_bid'])
bidA = df['assetA_bid']
askA = df['assetA_ask']
bidB = df['assetB_bid']
askB = df['assetB_ask']

fig1, (ax10, ax11, ax12) = plt.subplots(3, 1)
#ax10.set_xticks(hour_grid)
#ax10.set_xticks(minor_grid, minor=True)
#ax10.grid(which='both')

for sig in [bidA, askA, bidB, askB]:
    sig_scaled = sig-7050
    sig_scaled.plot(ax = ax10)
    sig_scaled.rolling(window=30).mean().plot(ax = ax10, style=':')

i = -20
for sig in [SpreadA, SpreadB]:
    i=i+20
    sig_scaled = sig*5+i # Just a scaling to fit on one axes
    sig_scaled.plot(ax = ax10)
    sig_scaled.rolling(window=30).mean().plot(ax = ax10, style=':')

ax10.legend(['bidA', 'bidA avg', 'askA', 'askA avg', 'bidB', 'bidB avg', 'askB', 'askB avg', 'SpreadA', 'SpreadA avg', 'SpreadB', 'SpreadB avg'])
ax10.set_title('A and B ask/bid and spread')

# Subplot 2
# a. A ask and bid zoomin

zoomin_range = np.arange(4000,5000,1)

for sig in [bidA, askA]:
    sig.iloc[zoomin_range].plot(ax = ax11)
    sig_mov = sig.rolling(window=30).mean()
    sig_mov.iloc[zoomin_range].plot(ax = ax11, style=':')
    sig.iloc[zoomin_range].plot(ax = ax11, style = '.')

ax11.legend(['bidA', 'bidA avg', 'bidA scatter', 'askA', 'askA avg', 'askA scatter'])
ax11.set_title('A Signals zoomin')

# Subplot 3
# a. B ask and bid zoomin 

for sig in [bidB, askB]:
    sig.iloc[zoomin_range].plot(ax = ax12)
    sig_mov = sig.rolling(window=30).mean()
    sig_mov.iloc[zoomin_range].plot(ax = ax12, style=':')
    sig.iloc[zoomin_range].plot(ax = ax12, style = '.')

ax12.legend(['bidB', 'bidB avg', 'bidB scatter', 'askB', 'askB avg', 'askB scatter'])
ax12.set_title('B Signals zoomin')

# Conclutions:
# 1. Zoomout: 
#   a. The trading dataframe is of one day aproximately
#   b. A and B are hightly correlated at least at low resolution
#   c. There are negative spreads in the data up to tens of seconds. Three of possible explanations are 
#       i. Bullish of Bearish trades beyond optimal opposite price point 
#       ii. missing data points/lags in an order book that creates opposite price optimization lag
#       iii. Lagging price correction 

# 2. Zoomin
#   a. Uneven sampling can be seen
#   b. A and B have different trading dynamics
#   c. A and B have different spread
#   d. A and B have different trading patterns
#   e. As a result of (b), (c), and (d) we can deduct different number of traders, hightly different volumes traded for assets A ndd B 

The time period is: Float64Index([1.083294814814815], dtype='float64', name='timestamp') days
Maximum possible trades in this period is: Float64Index([3119.889066666667], dtype='float64', name='timestamp') trades


Text(0.5, 1.0, 'B Signals zoomin')

In [28]:
# Plot the ask/bid data analitics

fig2, (ax20, ax21, ax22) = plt.subplots(3, 1)

# Subplot 1
# a. A trailing window var
# b. B trailing window var

ax20.set_title('Rolling window signal variance')

i=0
for sig in [bidA, askA, bidB, askB]:
    i=i+10
    sig_mov = sig.rolling(window=30).var()+i
    sig_mov.plot(ax = ax20)

ax20.legend(['bidA var', 'askA var','bidB var', 'askB var'])
ax20.get_xaxis().set_visible(False)

# Subplot 2
# a. ask/bid trailing window cov

ax21.set_title('Rolling window ask/bid covariance')
cov_A = bidA.rolling(window=30).cov(other = askA)+10
cov_B = bidB.rolling(window=30).cov(other = askB)-10
cov_A.plot(ax = ax21)
cov_B.plot(ax = ax21)
ax21.legend(['A bid/ask cov', 'B bid/ask cov'])
ax21.get_xaxis().set_visible(False)

# Subplot 3
# a. AB trailing window cov

ax22.set_title('Rolling window A/B covariance')
cov_bid = bidA.rolling(window=30).cov(other = bidB)+10
cov_ask = askA.rolling(window=30).cov(other = askB)-10
cov_bid.plot(ax = ax22)
cov_ask.plot(ax = ax22)
ax22.legend(['A/B bid cov', 'A/B ask cov'])
ax22.get_xaxis().set_visible(False)

# Conclutions:
#   a. Hmmmm... Not sure what can be seen here at low resolution in regard of var/covar between signals. 
#   b. Peaks in A/B and bid/ask covariance correlates to occations of magor trading (volume, price change) likely (?)
#   c. var and covar graphs for all the signals look alike which probably reflect the fact that assets are closely related. Especially it shows at major trading periods (spikes in var/covar) 


In [24]:
class silding_window():
# base class for sliding window functions 
    def __init__(self, size):
        self.size = size
        self.arr = []
        self.val = None
        self.last = None

    def update(self, val):
        if val == None:
            val = self.last
        self.last = val
        if len(self.arr) == self.size:
            self.arr.pop(0)
            self.arr.append(val)
            self.val = self.func()
            return

        self.arr.append(val)

    def getval(self):
        return self.val

    def next_tick(self, val):
        self.update(val)
        return self.getval

    def func(self):
    # This function is to be rewritten in downstream classees
        return None

class avg_silding_window(silding_window):
# mooving average class
    def func(self):
        return np.mean(self.arr)

def lookAheadTradeReturnPredictorFunc(pred, active_asset):
    # Input:
    #   Array of [K x 2N] (2 * trading delay) prices predictions for K assets
    # Output:
    #   [K x N] Array of minimal return predictions for each of next N ticks (1*trading delay delta) for each asset
    # Note: later to each element of future return predictions in non active asset the delta of active asset for the preceeding period should be added to account for a price of not taking a trading action 
    # Note: Each of the points in the array answers the question "what would be my return after minimal trading delay if I choose to go into asset k at timepoint 0<=n<N". Why so many optional point n instead of just a single time point in the present? Because by being gridy we are risking missing the "good" trade point in the nearest future because of the introduced trading delay   
    # A1 [1   2 3 1  3 1 3  4 4 23 32  32 34 ]
    # A2 [-1 -2 3 1 -3 1 3 -4 4 2  12 -12 14 ]
    # Note: How the trade return is calculated? At tick 0<=n<N asset price (buy - active asset is an exception) is taken and substracted from the price (sell) at point n'=n+N (Next nearest possible trade)
    pass

def lookAheadTradeDescitionMakerFunc(return_pred, greediness_w ):
    # Input:
    # [K x N] trading return predictions array
    # [N] Greediness vector. This is a vector that favors the earlier trades by multiplying the predicted returns vector for each asset. This is to caount for time value lost in delayed trades and rising uncertanty for farther predictions
    # Output:
    # (asset_idx, min_return) Returns index of asset with best predicted return and the predicted return itself 
    pass
class idealPredictor():
    def __init__(self, df, lookahead):
        self.df = df
        self.lookahead = lookahead

    def predict(self, ts):
        print(df.loc[ts])
        self

class tradingBot():
    # Note: 
    # Simple bot scheme
    # bare_signals -> features -> price_prediction  -> trade_orders  
    #                                     strategy  ->
    def __init__(self, predictor, qty = 100, trading_delay = 30):
        self.last_trade = None
        self.trading_delay = trading_delay 
        self.orders = []
        self.last_order = {}
        self.cur_asset_id = 0
        self.cur_qty = qty
        self.cur_ts = 0
        self.predictor = predictor

        
    def compute_tick(self, ts, data):
        # this handles new data arriving to the bot
        # done
        self.append_data(data)
        self.pred = self.predictor.update(data)
        self.cur_ts = ts
        if (ts-self.last_trade) > self.trading_delay*1000:
            self.pred = self.predictor.get_pred()
            new_asset_id = self.compute_trade()
            self.update_state(new_asset_id)

    def append_data(self, data):
        # TODO
        pass

    def compute_trade(self):
        # this computes trade based on the current market
        return_pred = self.doReturnCalculation(self.pred)
        (asset_id, ret) = self.makeDescition(return_pred)
        return asset_id

    def update_state(self, tgt_asset_id):
        # this updates bot state based on target asset_id
        # done
        if tgt_asset_id != self.cur_asset_id:
            if self.cur_asset_id == 0:
                self.issue_trades([('Buy', tgt_asset_id)])
            elif tgt_asset_id == 0:
                self.issue_trades([('Sell', self.cur_asset_id)])
            else:
                self.issue_trades([('Sell', self.cur_asset_id), ('Buy', tgt_asset_id)])
    
    def issue_trades(orders):
        # done
        actions = []
        for (order_type, asset_id) in orders:
            # update the quantities and asset stocks
            if order_type == 'Sell':
                self.qty = self.qty*get_sell_price(asset_id)
                self.asset_id = 0
            elif order_type == 'Buy':
                self.asset_id = asset_id
                self.qty = self.qty/self.get_buy_price(asset_id)
            # update the orders
            actions.append(order_type+str(asset_id))
            
        # update an order book
        self.orders.append({
                "time": self.cur_ts,
                "actions": actions
                })

    def get_sell_price(self, asset_id):
        # DONOTFORGET
        return 1
    def get_buy_price(self, asset_id):
        # DONOTFORGET
        return 1


            


In [29]:
p = idealPredictor(df, 50)
#print(df.head(3))
p.predict(1577836806371)

KeyError: '1577836806372'

In [2]:
def read_tick(df):
    for i, row in df.iterrows():
        tick = {    'timestamp':'next_tick',
                    'assets':{
                                'assetA':{
                                            'ask':row['assetA_ask'],
                                            'bid':row['assetA_bid']
                                        }, 
                                'assetB':{
                                            'ask':row['assetB_ask'],
                                            'bid':row['assetB_bid']
                                        }
                    }
                }
        yield tick

for t in read_tick(df):
    pass
    #print(t['assets']['assetA']['ask'])
    

NameError: name 'df' is not defined