In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 500)

# Simulator Code

In [2]:
def get_target_stoploss(df, threshold_ratio=(0.04,0.02), use_atr=True, atr_ratio=(2,1), reverse=False):
    if not reverse:
        if use_atr:
            stop_losses = df.low-(df.atr*atr_ratio[1])
            targets = df.close+(df.atr*atr_ratio[0])
        else:
            stop_losses = df.close-df.close*threshold_ratio[1]
            targets = df.close+df.close*threshold_ratio[0]
    else:
        if use_atr:
            stop_losses = df.high+(df.atr*atr_ratio[1])
            targets = df.close-(df.atr*atr_ratio[0])
        else:
            stop_losses = df.close+df.close*threshold_ratio[1]
            targets = df.close-df.close*threshold_ratio[0]

    return targets, stop_losses

def get_decisions_and_prices(x_data, pred, info_dict):
    next_action = 1
    target = -1
    stoploss = -1
    
    if type(x_data.index) != pd.RangeIndex:
        x_data = x_data.reset_index(drop=True)
    
    if type(pred) in (pd.DataFrame, pd.Series):
        pred = pred.to_numpy().ravel()

    use_atr = info_dict['model_use_atr']
    atr_ratio = info_dict['model_ratio']
    threshold_ratio = info_dict['model_ratio']
    reverse = info_dict['model_reverse']
        
    targets, stop_losses = get_target_stoploss(x_data,
                                               use_atr=use_atr,
                                               atr_ratio=atr_ratio,
                                               threshold_ratio=threshold_ratio,
                                               reverse=reverse)
    low_prices = x_data['low'].to_numpy()
    high_prices = x_data['high'].to_numpy()
    
    # Decisions:
    # 1 = buy
    # 0 = hold (default)
    # -1 = sell
    decision = pd.Series(0, index=x_data.index)
    execution_price = pd.Series(0.0, index=x_data.index)

    i = 0
    while True:
        if i>=len(x_data):
            break
        if next_action == 1:
            # Find next buy opportunity
            try:
                next_buy_idx = np.where(pred[i:]==1)[0][0] + i
                target = targets.iloc[next_buy_idx]
                stoploss = stop_losses.iloc[next_buy_idx]
                decision.at[next_buy_idx] = 1
                execution_price.at[next_buy_idx] = x_data.loc[next_buy_idx, 'close']
                i = next_buy_idx+1
                next_action = -1
            except:
                # No more buy opportunties
                break
        else:
            # Find next sell opportunity
            try:
                if not reverse:
                    next_sell_idx = np.where((high_prices[i:]>=target) | (low_prices[i:]<=stoploss))[0][0] + i
                else:
                    next_sell_idx = np.where((low_prices[i:]<=target) | (high_prices[i:]>=stoploss))[0][0] + i
                if x_data.loc[next_sell_idx, 'low'] <= target <= x_data.loc[next_sell_idx, 'high']:
                    execution_price.at[next_sell_idx] = target
                else:
                    execution_price.at[next_sell_idx] = stoploss
                decision.at[next_sell_idx] = -1
                i = next_sell_idx+1
                next_action = 1
            except:
                # No more sell opportunties
                break

    return decision, execution_price

def simulate(in_df, starting_value, trading_fees_percent, trading_fees_buy, trading_fees_sell):
    df = in_df.copy()
    df['value'] = 0.0
    value = starting_value
    fee_multiplier = 1.0 - trading_fees_percent / 100

    for x,r in df.iterrows():
        if r.decision == 1 and value > 0:
            value = ((value-trading_fees_buy) * r.price) * fee_multiplier
            if value < 0:
                break
        elif r.decision == -1 and value > 0:
            value = ((value-trading_fees_sell) / r.price) * fee_multiplier
            if value < 0:
                break
        else:
            break # value is below zero
        df.loc[x,'value'] = value
    return df.value

def run_simulator(X, y, model_use_atr, model_ratio, model_reverse,
                  starting_value=1, trading_fees_percent=0.1,
                  trading_fees_buy=0, trading_fees_sell=0):
    df = X.copy()
    d = dict(model_use_atr=model_use_atr, model_ratio=model_ratio, model_reverse=model_reverse)
    
    decision, execution_price = get_decisions_and_prices(X, y, d)
    
    df['decision'] = decision.values
    df['price'] = execution_price.values
    
    sim_df = df[df['decision']!=0][['decision','price']].copy()
    if len(sim_df) == 0:
        return starting_value
    else:
        sim_df['value'] = simulate(sim_df, starting_value, trading_fees_percent, trading_fees_buy, trading_fees_sell)
        return sim_df[sim_df.decision==-1].value.to_numpy()[-1]

# Custom Scaler

In [3]:
from trade_models.n1_custom_scalers import CustomScaler1

# Load Data

In [4]:
def train_test_split(X, y, train_idx=None, test_idx=None):
    X_train = X.loc[train_idx]
    y_train = y.loc[train_idx]
    X_test = X.loc[test_idx]
    y_test = y.loc[test_idx]
    return (X_train, y_train, X_test, y_test)

def load_split_data(suffix, split=False):
    X = pd.read_pickle(f'data/X_{suffix}.pkl')
    y = pd.read_pickle(f'data/y_{suffix}.pkl')
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc['2018':'2020'].index, X.loc['2021':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y

X_train, y_train, X_test, y_test = load_split_data(suffix='20210806i', split=True)

# Build Model Pipeline

In [5]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

clf = make_pipeline(CustomScaler1(),LogisticRegression(random_state=42, max_iter=10000)).fit(X_train, y_train.buy)

# Evaluate

In [6]:
from sklearn.metrics import f1_score, precision_score, recall_score

pred = clf.predict(X_test)

prec = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
profit = run_simulator(X_test, pred, model_use_atr=True, model_ratio=(2,1), model_reverse=True)

print("Precision:", prec)
print("Recall:", recall)
print("Profit:", profit)

Precision: 0.5623529411764706
Recall: 0.05825716026812919
Profit: 1.1390253129535695


# Save Model

In [7]:
import pickle
import datetime

timestamp = datetime.datetime.now().strftime('%y%m%d%H%M')
filename = f"models/nm_lrc_scaled_{timestamp}.pkl"
print(f'Saving model to {filename}...')
pickle.dump(clf, open(filename, 'wb'))

Saving model to models/nm_lrc_scaled_2108171514.pkl...
