In [8]:
buy_decision_model = 'dummy'
price_decision_model = 'dummy'
start_simulation = '2018-10-01' #YYYY-MM-DD
end_simulation = '2018-12-31' #YYYY-MM-DD
symbol = 'ETHBTC'
max_batch_size = 1000 # purely for lambda memory considerations
funds = (1,0) # in pair order

In [2]:
import psycopg2
import pandas as pd
import numpy as np
import re

# models
import buy_decision
import price_decision

In [3]:
# local only
import hidden
sql_string = hidden.psycopg2(hidden.secrets())
print('PostgreSQL connection data taken from hidden.py')

# Make the connection and cursor
conn = psycopg2.connect(sql_string, connect_timeout=3)

PostgreSQL connection data taken from hidden.py


In [9]:
import datetime
def validate(date_text):
    try:
        datetime.datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect data format, should be YYYY-MM-DD")
        
# validate start / end string format input to protect against injection
validate(start_simulation)
validate(end_simulation)

# validate symbol
assert re.match('^[A-Z]{2,}$', symbol), f"{symbol} is invalid symbol"

# validate starting funds
assert funds[0] > 0, "Starting funds for coin 1 must be > 0, since we always buy first"

# validate batch size
assert (isinstance(max_batch_size, int) and max_batch_size > 0), f"{max_batch_size} is not a valid batch size"

# validate model names too!
# TODO: validate model names

In [10]:
def get_batch_data(pair_symbol, start_time, end_time, batch_size):
    # TODO: this needs to be expanded to include engineered features
    sql = f"""
    select * from candlestick_15m cm 
    inner join pairs p on cm.pair_id = p.id 
    where
        p.symbol = '{pair_symbol}'
        and cm.close_time notnull
        and cm.open_time between '{start_time}' and '{end_time}'
    order by cm.open_time limit {batch_size}
    """
    df = pd.read_sql_query(sql, conn)
    return df

In [11]:
# initialization

buy_model = buy_decision.__dict__[buy_decision_model]
price_model = price_decision.__dict__[price_decision_model]
buy_columns = buy_model.columns()
price_columns = price_model.columns()

In [12]:
current_time = start_simulation
all_output = []
while True:
    # this batch data should also include feature engineering
    batch_data = get_batch_data(symbol, current_time, end_simulation, max_batch_size)
    
    if len(batch_data) == 0:
        break
    
    batch_buy_decision = buy_decision.__dict__[buy_decision_model].make_decision(batch_data[buy_columns].values)
    batch_price_decision = price_decision.__dict__[price_decision_model].stop_loss_target_range(batch_data[price_columns].values)

    output = batch_data[['open_time']].copy()
    output['buy_decision'] = batch_buy_decision
    output[['stop-loss_price', 'target_price']] = batch_price_decision
    
    all_output.append(output)
    current_time = batch_data['close_time'].max()

results = pd.concat(all_output)

In [13]:
results

Unnamed: 0,open_time,buy_decision,stop-loss_price,target_price
0,2018-10-01 00:00:00,False,0.033416,0.036934
1,2018-10-01 00:15:00,False,0.033453,0.036975
2,2018-10-01 00:30:00,False,0.033450,0.036972
3,2018-10-01 00:45:00,False,0.033468,0.036991
4,2018-10-01 01:00:00,False,0.033375,0.036889
...,...,...,...,...
690,2018-12-30 23:00:00,False,0.034324,0.037938
691,2018-12-30 23:15:00,False,0.034379,0.037997
692,2018-12-30 23:30:00,False,0.034350,0.037966
693,2018-12-30 23:45:00,False,0.034341,0.037955
