In [1]:
from sklearn.preprocessing import maxabs_scale
import numpy as np
import pandas as pd
import os
from joblib import load

def preprocessing(input, output):
    # TODO this is for the non optimal dataset stored with unnamed: 0 index
    input.dropna(inplace=True, axis=1)
    input = input.set_index('open').drop(columns=['Unnamed: 0'])
    print(input.columns)
    print(input.shape)
    print(input.head(1))
    input = input.values
    print(input[0])
    normalized_input = maxabs_scale(input, axis=0)
    # normalized_input = input / input.max(axis=0)
    output_h = output['highest_in_5_days'].values
    output_l = output['lowest_in_5_days'].values
    return input, output_l, output_h


def load_model(sym='AAPL',
               model_type='DecisionTreeRegressor',
               output_type='lows'):
    path = rf'D:\repo\stonks\data\dataset\{sym}\predictor_{output_type}_{model_type}.joblib'
    print(path)
    predictor_info = load(path)
    return predictor_info


DATASET_DIR = r'D:\repo\stonks\data\dataset'

def file_path(sym, type, file_type = 'csv', dir = DATASET_DIR):
    return os.path.join(dir, f'{sym}_{type}.{file_type}')

sym = 'AAPL'
input_df = pd.read_csv(file_path(sym, 'inputs'))
output_df = pd.read_csv(file_path(sym, 'outputs'))
input, output_l, output_h = preprocessing(input_df, output_df)
predictor_l = load_model(sym, 'DecisionTreeRegressor', 'lows')
predictor_h = load_model(sym, 'DecisionTreeRegressor', 'highs')


Index(['high', 'low', 'close', 'volume', 'trade_count', 'vwap', 'volume_adi',
       'volume_obv', 'volume_cmf', 'volume_fi', 'volume_em', 'volume_sma_em',
       'volume_vpt', 'volume_vwap', 'volume_nvi', 'volatility_bbm',
       'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbhi',
       'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl',
       'volatility_kcw', 'volatility_kchi', 'volatility_kcli',
       'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcw',
       'volatility_atr', 'volatility_ui', 'trend_macd', 'trend_macd_signal',
       'trend_macd_diff', 'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast',
       'trend_ema_slow', 'trend_trix', 'trend_mass_index', 'trend_dpo',
       'trend_kst', 'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
       'trend_adx', 'trend_adx_pos', 'trend_adx_neg',
       'trend_visual_ichimoku_b', 'trend_ar

In [3]:
def run_back_test(X,
                  predictor_h,
                  predictor_l,
                  gain_ratio=1.5,
                  min_gain_ratio=1.1,
                  max_loss_ratio=0.9,
                  buy_threshold=0):
    '''
    Tests prediction model based on gain factor and safety margin
    Assumes using bracket order
    gain_ratio: gain factor to sell
    min_gain_ratio: minimum gain factor to buy
    max_loss_ratio: maximum loss factor to buy
    '''
    brought = False
    buy_counter = 0
    brought_prices = []
    brought_index = []
    sold_prices = []
    sold_index = []
    brought_margins = []
    gains = []

    for index, row in enumerate(X):
        # 0=open, 1=high, 2=low, 3=close, 4=volume
        closed_price = row[2]
        if brought:
            # Stop Loss
            stop_loss = closed_price <= (brought_prices[-1] *
                                         brought_margins[-1][0])
            take_profit = closed_price >= (brought_prices[-1] *
                                           brought_margins[-1][1])
            if stop_loss or take_profit:
                # print('stop_loss', stop_loss, 'take_profit', take_profit, 'closed_price', closed_price, 'brought_price', brought_prices[-1], 'gain', closed_price/brought_prices[-1])
                sold_prices.append(closed_price)
                sold_index.append(index)
                gains.append(sold_prices[-1] / brought_prices[-1])
                brought = False
                # print('sold @ ', closed_price, 'brought @ ', brought_prices[-1], 'gain = ', closed_price/brought_prices[-1])
            else:
                continue

        input = row.reshape(1, -1)
        low_prediction = predictor_l['model'].predict(input)[0]/100
        high_prediction = predictor_h['model'].predict(input)[0]/100
        gain = round(high_prediction / low_prediction, 2)
        gain_ratio_achieved = gain >= gain_ratio
        min_gain_ratio_achieved = high_prediction >= min_gain_ratio
        max_loss_ratio_overcomed = low_prediction >= max_loss_ratio
        # print('gain', gain, 'gain_ratio_achieved', low_prediction, high_prediction)
        if not brought:
            buy_counter += 1
            if gain_ratio_achieved and min_gain_ratio_achieved and max_loss_ratio_overcomed and buy_counter >= buy_threshold:
                brought = True
                # print('brought @ ', closed_price)
                brought_prices.append(closed_price)
                brought_index.append(index)
                brought_margins.append([low_prediction, high_prediction])
                buy_counter = 0
    gain = np.prod(gains)
    print('gain:', gain, 'gain_ratio:', gain_ratio, 'min_gain_ratio:', min_gain_ratio, 'max_loss_ratio:', max_loss_ratio)
    return brought_prices, sold_prices, brought_index, sold_index, brought_margins, gains, gain

backtests = []
for gain_ratio in [1]:
    # np.arange(1, 3, 0.1)
    for min_gain_ratio in np.arange(1, 2, 0.03):
        for max_loss_ratio in np.arange(0.9, 1.1, 0.03):            
            brought_prices, sold_prices, brought_index, sold_index, brought_margins, gains, gain = run_back_test(input,
                            predictor_h,
                            predictor_l,
                            gain_ratio=gain_ratio,
                            min_gain_ratio=min_gain_ratio,
                            max_loss_ratio=max_loss_ratio,
                            buy_threshold=0)
            score = {'gain_ratio': gain_ratio,
                     'min_gain_ratio': min_gain_ratio,
                     'max_loss_ratio': max_loss_ratio,
                     'gain': gain,
                     'brought_prices': brought_prices,
                     'sold_prices': sold_prices,
                     'brought_index': brought_index,
                     'sold_index': sold_index,
                     'brought_margins': brought_margins,
                     'gains': gains   
                     }
            backtests.append(score)
            
# Takes 1500 seconds to run triple for loop
# 1min/loop * 50^2 loops = 2500min = 41.6 hours
# 375min for 238 loops = 1.57min/loop

gain: 1.3181345582053077 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 0.9
gain: 1.3883038049550909 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 0.93
gain: 1.6429296218537326 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 0.9600000000000001
gain: 1.3770932140242778 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 0.9900000000000001
gain: 1.4895209471376523 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 1.02
gain: 1.7016513031544087 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 1.0500000000000003
gain: 1.6339678656978163 gain_ratio: 1 min_gain_ratio: 1.0 max_loss_ratio: 1.08
gain: 1.1047942934568427 gain_ratio: 1 min_gain_ratio: 1.03 max_loss_ratio: 0.9
gain: 1.1955581336702463 gain_ratio: 1 min_gain_ratio: 1.03 max_loss_ratio: 0.93
gain: 1.5214741754099232 gain_ratio: 1 min_gain_ratio: 1.03 max_loss_ratio: 0.9600000000000001
gain: 1.3893929395308573 gain_ratio: 1 min_gain_ratio: 1.03 max_loss_ratio: 0.9900000000000001
gain: 1.4895209471376523 gain_ratio: 1 min_gain_

In [43]:
print('num of scores:', len(backtests))
backtests_df = pd.DataFrame(backtests).sort_values('gain', ascending=False)
# print(backtests_df)
# backtests_df.to_csv(rf'D:\repo\stonks\data\dataset\{sym}\backtests.csv', index=False)
print(backtests_df.head(5)[['min_gain_ratio', 'max_loss_ratio', 'gain']])
best_gain = backtests_df.iloc[0]['gains']
print(type(best_gain), len(best_gain))
print(np.mean(best_gain))
print(np.prod(best_gain))

num of scores: 238
    min_gain_ratio  max_loss_ratio      gain
33            1.12            1.05  1.800181
12            1.03            1.05  1.701651
5             1.00            1.05  1.701651
19            1.06            1.05  1.701651
39            1.15            1.02  1.701032
<class 'list'> 1158706
1.0000013792073355
1.8001813914605214
