In [1]:
import pandas as pd
import data_util
from tqdm import tqdm_notebook as tqdm
from data_generator import DataGenerator
from tec_an import TecAn
import numpy as np
from data_util import *
from sklearn.model_selection import train_test_split

In [31]:
import pandas as pd
from ta.trend import *
from ta.momentum import *
from ta.volume import *
from ta.volatility import *
from ta import add_all_ta_features, add_trend_ta, add_volume_ta, add_volatility_ta, add_momentum_ta, add_others_ta
import matplotlib.pyplot as plt
import multiprocessing
import threading
import numpy as np
from tec_an import TecAn


class TacProcess():
    def __init__(self):
        self.old_price = -1
        
    def add_tacs(self, list, index, result):
        list = []
        #print(len(result[-1]))
        #print(index)
        for tec in result:
            list.append(tec.iloc[index])
        return list

    def add_tacs_realtime(self, list, price, amount, tec):
        list = []
        list.extend(tec.add_ta(price, amount))
        self.old_price = price
        #print("{} {} {}".format(price, amount, list))
        return list
    

TIMESTAMP_KEY = "timestamp"
MICROTIMESTAMP_KEY = "microtimestamp"
ASKS_KEY = "asks"
BIDS_KEY = "bids"
PRICE_KEY = "price"
AMOUNT_KEY = "amount"
CLOSE = 'close'

class DataAgent():
    
    def __init__(self,
                 resample = '2Min',
                 taProc = TacProcess(), 
                 tec = TecAn(windows = 20, windows_limit = 100),
                 on_new_data = lambda x: print("{}".format(x)),
                 on_new_order = lambda buy, sell: buy,
                 on_closed_price = lambda price: price
                 ):
        self.taProc = taProc
        self.tec = tec
        self.final_x = []
        self.list = []
        self.resample = resample
        self.raw_limit = 10000
        self.last_price = -1 
        self.last_amount = -1
        self.last_ohlc_count = 1
        self.on_new_data = on_new_data
        self.on_new_order = on_new_order
        self.on_closed_price = on_closed_price
        self.on_new_data_count = 0
        
    def on_new_data(self, x):
        self.on_new_data(x)
        
    def on_new_raw_data(self, raw):
        price = raw[PRICE_KEY]
        amount = raw[AMOUNT_KEY]
        
        self.on_new_order(raw[BIDS_KEY], raw[ASKS_KEY])
        
        # Only consider when prices changes
        if (self.last_price == price and self.last_amount == amount):
            return
        
        self.last_price = price 
        self.last_amount = amount
        
        timestamp = raw[TIMESTAMP_KEY]
        timestamp = pd.to_datetime(timestamp, unit='s')
        self.list.append([timestamp, price])
        
        if (len(self.list) > self.raw_limit):
            self.list.pop(0)
        
        DATE = 'Date'
        df = pd.DataFrame(self.list, columns = [DATE, CLOSE])
        df = df.set_index(pd.DatetimeIndex(df[DATE]))
                
        time = df[CLOSE].resample(self.resample)
        ohlc = time.ohlc()
        
        #print("{} {}".format(timestamp, len(ohlc)))
        
        ohlc_count = len(ohlc)
        if (ohlc_count < 2 or self.last_ohlc_count == ohlc_count):
            return
        
        self.last_ohlc_count = ohlc_count
        
        print("{} - {}".format(self.last_ohlc_count, ohlc_count))
        
        del ohlc['open']
        del ohlc['high']
        del ohlc['low']
        
        price = ohlc.iloc[-2][CLOSE]
        
        self.on_closed_price(price)
        
        self.on_new_data_count = self.on_new_data_count + 1

        x = self.taProc.add_tacs_realtime([], price, 0.0, self.tec)
        self.on_new_data(x)
        
        self.ohlc = ohlc


In [32]:
def get_full_database(resample, raw_dir, base_dir = "data/"):

    full_data = base_dir + raw_dir + "/"
    data_gen = DataGenerator(random = False, base_dir = full_data)
    data_gen.rewind()
    data_count = (data_gen.steps - 100)
    data_count = 20000

    final_x = []

    closed_prices = []

    on_new_data = lambda x: final_x.append(x)
    on_closed_price = lambda price: closed_prices.append(price)
    
    print(resample)

    agent = DataAgent(
        resample = resample,
        on_new_data = on_new_data,
        on_closed_price = on_closed_price
    )

    print("Processing {}".format(raw_dir))

    for i in tqdm(range(data_count)):
        agent.on_new_raw_data(data_gen.next())


    closes = pd.DataFrame(closed_prices, columns = ['Close'])

    final_y = closes
    
    
    return final_x, final_y, agent

In [36]:
x, y, agent = get_full_database('20Min', "btceur", base_dir = "data/")


20Min
Processing btceur


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(data_count)):


  0%|          | 0/20000 [00:00<?, ?it/s]

2 - 2
3 - 3
4 - 4


In [37]:
print(len(x))

3


[[0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  1.0,
  -9.413234072004471,
  -9.413234072004471,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0,
  0,
  0,
  0,
  0,
  0],
 [-10.873891737894155,
  0.0,
  0.8708919878243762,
  0.0,
  0.0,
  0.0,
  1.0017448230805308,
  -8.716603319970393,
  -9.409750500530338,
  0.0,
  0.0,
  -100.0,
  0.0,
  -0.0277479752166299,
  -0.00554959504332598,
  -0.0277479752166299,
  0.0,
  0.0,
  0.0,
  0.0,
  0,
  0,
  0,
  0,
  0,
  0],
 [-5.293462851765798,
  0.5389065248389331,
  0.12743136883804915,
  0.0,
  0.0,
  0.0,
  0.9986435355645321,
  -8.720383392076073,
  -8.31491828396791,
  0.0,
  0.0,
  -97.61701338013742,
  0.0,
  -0.019056660549778334,
  -0.008251008144616451,
  -0.019056660549778334,
  0.0,
  0.0,
  0.0,
  0.0,
  0,
  0,
  0,
  0,
  0,
  0]]