In [1]:
import sys
sys.path.append('/Users/rkam/projs/algotrade')
import numpy as np
import pandas as pd
import scipy as sp
import itertools
from sklearn.neighbors import NearestNeighbors
from typing import Set, Dict, Tuple, NewType
from tqdm import tqdm
from datetime import timedelta, datetime
from src.dbmanager import DBManager
from src.markov.agent import Agent
from src.markov.environment import Environment

In [2]:
dbob = DBManager(path='/Users/rkam/projs/algotrade/data/db/ob.db')
dbtr = DBManager(path='/Users/rkam/projs/algotrade/data/db/tr.db')

## Data

In [3]:
ob = dbob.read(
    """
    select
        *
    from
        ob
    """
)
ob.columns = ['ts', 'bprice', 'bquantity', 'aprice', 'aquantity']
ob['ts'] = pd.to_datetime(ob['ts']).dt.tz_localize(None)
ob = ob.iloc[:500000]

tr = dbtr.read(
    """
    select
        *
    from
        tr
    """
)
tr.columns = ['ts', 'direction', 'price', 'quantity']
tr['ts'] = pd.to_datetime(tr['ts']).dt.tz_localize(None)

## Формируем множества

In [4]:
buy_actions = list(itertools.product(['buy'], np.arange(0.2, 0.5, 0.1), np.arange(0.01, 0.11, 0.01)))
sell_actions = list(itertools.product(['sell'], np.arange(-0.5, -0.2, 0.1), np.arange(0.01, 0.11, 0.01)))
actions = set(buy_actions + sell_actions)

In [5]:
ob_groupped = ob.groupby('ts', group_keys=True).apply(lambda df_: df_)

depth_ob = 5
last_trades = 5
states = set()
for ts in tqdm(ob['ts'].unique(), total=ob['ts'].nunique()):
     
    cols = ['direction', 'price', 'quantity']
    trades = tr[tr['ts'] <= ts].iloc[-last_trades:]
    trades[cols] = trades[cols].apply(lambda col: col / col.iloc[-1])
    embd_tr = trades[cols].values.flatten()
    if trades.shape[0] != last_trades:
        continue
    cols = ['bprice', 'bquantity', 'aprice', 'aquantity']
    orderbook = ob_groupped.loc[ts].iloc[:depth_ob]
    orderbook[cols] = orderbook[cols].apply(lambda col: col / col.max())
    embd_ob = orderbook[cols].values.flatten()

    state = tuple(np.concatenate([embd_ob, embd_tr]))
    states.add(state)
embd_length = pd.Series([len(x) for x in states]).value_counts().index[0]
states = [x for x in states if len(x) == embd_length]

100%|██████████| 9993/9993 [00:16<00:00, 599.07it/s]


In [6]:
rewards = np.arange(-2, 2.1, 0.1)

In [7]:
agent = Agent(states=states, actions=actions, rewards=rewards, n_init_counts=10, eps=0.05)

init counts...
init probs...


In [8]:
environment = Environment(depth_ob=depth_ob, last_trades=last_trades, trades=tr, orderbooks=ob, state_length=embd_length)

In [None]:
idx = np.random.choice(a=range(0, len(actions)))
action = ('none', 0, 0)
for ts in tqdm(ob['ts'].unique(), total=ob['ts'].nunique()):
    # state reward
    state_reward = environment.get_state_reward(ts, action, agent.agent_state)
    if state_reward is None:
        continue
    else:
        state, reward = state_reward
    # action
    action = agent.get_action(state, reward)
    #print(f'Action: {action}, Reward: {reward}')

In [4]:
import numpy as np
np.linspace(1, 0.001, 10000)

array([1.        , 0.99990009, 0.99980018, ..., 0.00119982, 0.00109991,
       0.001     ])

In [5]:
np.arange(0.01, 0.03, 0.01)

array([0.01, 0.02])