## Data prep

In [None]:
import os
while not os.path.isfile('config.yaml'):
    os.chdir("../")
import pandas as pd
import yaml

with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

from api.api import Api
api = Api.create_api(config)

# Breakout model

In [None]:
from ml.paralel import driver_func, features_b

train_results = driver_func()
train_data = pd.DataFrame([element for sublist in train_results for element in sublist])
#train_data['target'] = train_data['target'] + 1

test_results = driver_func(test=True)
test_data = pd.DataFrame([element for sublist in test_results for element in sublist])
#test_data['target'] = test_data['target'] + 1

X_train = train_data[features_b]
X_test = test_data[features_b]

y_train = train_data['target']
y_test = test_data['target']


In [None]:
counts = y_test.value_counts()
counts, counts[1] / len(y_train) 

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import collections
from sklearn.utils.class_weight import compute_sample_weight
import optuna

sample_weights = compute_sample_weight(class_weight='balanced', y=y_train)

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 25),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'eta': trial.suggest_float('eta', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    
    model = XGBClassifier(**param)
    model.fit(X_train, y_train, sample_weight=sample_weights)
    y_pred = model.predict(X_test)

    fil_pred = [y_pred[i] for i in range(len(y_pred)) if y_pred[i] == 1] 
    fil_test = [y_test[i] for i in range(len(y_pred)) if y_pred[i] == 1] 

    acc = accuracy_score(fil_pred, fil_test)
    counter = collections.Counter(y_pred)
    traderate = counter[1] / len(y_pred)
    print(f'winrate: {acc}, traderate: {traderate}')
    return 0.0*traderate + acc



study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)
best_params = study.best_params

print("Best parameters:", best_params)
model = XGBClassifier(**best_params)
model.fit(X_train, y_train, sample_weight=sample_weights)

y_pred = model.predict(X_test)
fil_pred = [y_pred[i] for i in range(len(y_pred)) if y_pred[i] == 1] 
fil_test = [y_test[i] for i in range(len(y_pred)) if y_pred[i] == 1] 

acc = accuracy_score(fil_pred, fil_test)


model.save_model('./ml/breakout_model.json')
counter = collections.Counter(y_pred)
print(f'winrate: {acc}', f'traderate: {counter[1] / len(y_pred)}')

from xgboost import plot_importance
plot_importance(model)
    

# Direction model

In [None]:
from ml.paralel import driver_func, features_d

train_results = driver_func(direction=True)
train_data = pd.DataFrame([element for sublist in train_results for element in sublist])
train_data['target'] = train_data['target'] + 1

test_results = driver_func(test=True, direction=True)
test_data = pd.DataFrame([element for sublist in test_results for element in sublist])
test_data['target'] = test_data['target'] + 1

classes = [2, 0]

train_data = train_data[train_data.target.isin(classes)]
test_data = test_data[test_data.target.isin(classes)]

X_train = train_data[features_d]
X_test = test_data[features_d]

y_train = train_data['target']
y_test = test_data['target']


y_train = y_train // 2
y_test = y_test // 2


In [None]:
counts = y_test.value_counts()
counts

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import collections
from sklearn.utils.class_weight import compute_sample_weight
import optuna

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 25),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'eta': trial.suggest_float('eta', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    model = XGBClassifier(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    return acc 



study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
best_params = study.best_params

print("Best parameters:", best_params)
model = XGBClassifier(**best_params)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


acc = accuracy_score(y_pred, y_test)

model.save_model('./ml/direction_model.json')
counter = collections.Counter(y_pred)
print(f'winrate: {acc}')

from xgboost import plot_importance
plot_importance(model)