In [212]:
import pandas as pd
import talib
import numpy as np

In [213]:
df = pd.read_excel('gbp_usd_1h.xlsx')

In [214]:
def set_candle_type(row):
    ratio = row['tr'] / row['atr']

    if ratio < 0.8:
        return 'spinning'
    if 1.2 > ratio >= 0.8:
        return 'standard'
    if 2.4 > ratio >= 1.2:
        return 'long'
    if ratio >= 2.4:
        return 'spike'


def get_candle_type_id(candle_type):
    if candle_type == 'spinning':
        return 0
    if candle_type == 'standard':
        return 1
    if candle_type == 'long':
        return 2
    if candle_type == 'spike':
        return 3


def candle_color(r):
    if r['open'] > r['close']:
        return 'r'
    if r['open'] < r['close']:
        return 'g'

    return 'y'


def set_ma_status(row):
    if row['close'] > row['ma']:
        return 'upper'
    if row['close'] < row['ma']:
        return 'under'
    return 'eq'


def get_candle_color(row):
    open = row['open']
    close = row['close']

    if close > open:
        return 2
    if close == open:
        return 1

    return 0


def get_trade_from_pattern(pattern: np.array):
    timestamp = pattern[-1, 0].copy()
    entry = pattern[-1, 4].copy()
    tp_buy = pattern[-1, 7].copy()
    tp_sell = pattern[-1, 8].copy()

    return timestamp, entry, tp_buy, tp_sell


def evaluate_trade_result(chart: np.array, trade: tuple):
    timestamp, entry, tp_buy, tp_sell = trade

    backtest_chart = chart[chart[:, 0] >= timestamp]

    for i, element in enumerate(backtest_chart):
        price_high = element[2]
        price_low = element[3]

        if price_high > tp_buy:
            return "buy"

        if price_low < tp_sell:
            return "sell"

    return "None"

In [215]:
df['atr'] = talib.ATR(
    df['high'],
    df['low'],
    df['close'],
    timeperiod=24
)
df['ma'] = talib.KAMA(
    df['close'],
    timeperiod=14
)

df['adx'] = talib.ADX(
    df['high'],
    df['low'],
    df['close'],
    timeperiod=24
)

df['rsi'] = talib.RSI(
    df['close'],
    timeperiod=14
)

df['tr'] = talib.TRANGE(
    df['high'],
    df['low'],
    df['close'],
)


# df['macd'] = talib.MA(
#     df['close'],
#     timeperiod=14
# )


In [216]:
df_backup = df.copy()
df = df_backup.iloc[47:]
df = df.reset_index(drop=True)

In [217]:
df['ma_status'] = df.apply(set_ma_status, axis=1)



In [218]:
df['candle_type'] = df.apply(set_candle_type, axis=1)


In [219]:
df['candle_type_id'] = df['candle_type'].apply(get_candle_type_id)


In [220]:
df['candle_color'] = df.apply(get_candle_color, axis=1)

In [221]:
df['entry'] = df['close']
df['tp_buy'] = df['close'] + (df['atr'] * 2.4)
df['tp_sell'] = df['close'] - (df['atr'] * 2.4)

In [222]:
df['datetime'] = pd.to_datetime(df['datetime'])
df['timestamp'] = df['datetime'].apply(lambda d: int(d.timestamp()))

In [223]:
df.columns

Index(['Unnamed: 0', 'datetime', 'open', 'high', 'low', 'close', 'timeframe',
       'atr', 'ma', 'adx', 'rsi', 'tr', 'ma_status', 'candle_type',
       'candle_type_id', 'candle_color', 'entry', 'tp_buy', 'tp_sell',
       'timestamp'],
      dtype='object')

In [224]:
array = np.array(df[['timestamp', 'open', 'high', 'low', 'close', 'atr', 'adx', 'tp_buy', 'tp_sell', 'candle_type_id',
                     'candle_color', ]])

In [225]:
patterns = []
array_size = len(array)

for i, element in enumerate(array):
    if i + 24 == array_size:
        break

    pat = array[i:i + 24, :]
    patterns.append(pat)

In [226]:
trade_result = []
for pat in patterns:
    trade = get_trade_from_pattern(pat)
    result = evaluate_trade_result(array, trade)
    trade_result.append((trade[0], result))

In [227]:
patterns_for_train = []
for pat in patterns:
    id = pat[-1, 0]
    closes = pat[:, 4]
    atr = pat[-1, 5]
    adx = pat[-1, 6]
    min_low = min(pat[:, 3])
    candle_colors = pat[:, 10]
    candle_types = pat[:, 9]
    result = np.array([id, atr, adx, min_low])
    result = np.hstack((result, closes, candle_types, candle_colors))
    patterns_for_train.append(result)


In [228]:
patterns_for_train_df = pd.DataFrame(patterns_for_train,
                                     columns=[
                                                 'timestamp',
                                                 'atr',
                                                 'adx',
                                                 'min_low'
                                             ] + [f'close_{i}' for i in range(1, 25)] + [
                                                 f'candle_type_{i}' for i in range(1, 25)] + [
                                                 f'candle_color_{i}' for i in range(1, 25)]
                                     )

In [229]:
trade_result_df = pd.DataFrame(trade_result, columns=['timestamp', 'result'])

In [230]:
patterns_for_train_df = pd.merge(patterns_for_train_df, trade_result_df, on='timestamp')

In [231]:
df_ma_statuses = df[['timestamp', 'ma_status', 'rsi']]

In [232]:
final_patterns_for_training = pd.merge(patterns_for_train_df, df_ma_statuses, on='timestamp')

In [233]:
final_patterns_for_training = final_patterns_for_training[
    final_patterns_for_training['result'] != 'None'
    ]

In [234]:
final_patterns_for_training['result'] = (
    final_patterns_for_training['result'].apply(lambda x: 1 if x == 'buy' else 0)
)

In [235]:
final_patterns_for_training['ma_status'] = (
    final_patterns_for_training['ma_status'].apply(lambda x: 1 if x == 'upper' else 0)
)

In [236]:
final_patterns_for_training.columns

Index(['timestamp', 'atr', 'adx', 'min_low', 'close_1', 'close_2', 'close_3',
       'close_4', 'close_5', 'close_6', 'close_7', 'close_8', 'close_9',
       'close_10', 'close_11', 'close_12', 'close_13', 'close_14', 'close_15',
       'close_16', 'close_17', 'close_18', 'close_19', 'close_20', 'close_21',
       'close_22', 'close_23', 'close_24', 'candle_type_1', 'candle_type_2',
       'candle_type_3', 'candle_type_4', 'candle_type_5', 'candle_type_6',
       'candle_type_7', 'candle_type_8', 'candle_type_9', 'candle_type_10',
       'candle_type_11', 'candle_type_12', 'candle_type_13', 'candle_type_14',
       'candle_type_15', 'candle_type_16', 'candle_type_17', 'candle_type_18',
       'candle_type_19', 'candle_type_20', 'candle_type_21', 'candle_type_22',
       'candle_type_23', 'candle_type_24', 'candle_color_1', 'candle_color_2',
       'candle_color_3', 'candle_color_4', 'candle_color_5', 'candle_color_6',
       'candle_color_7', 'candle_color_8', 'candle_color_9', 'candle

In [301]:
data_set = final_patterns_for_training[
    [
        'atr', 'adx', 'ma_status', 'rsi',
        'min_low',
        'close_1', 'close_2', 'close_3',
     'close_4', 'close_5', 'close_6', 'close_7', 'close_8', 'close_9',
     'close_10', 'close_11', 'close_12', 'close_13', 'close_14', 'close_15',
     'close_16', 'close_17', 'close_18', 'close_19', 'close_20', 'close_21',
     'close_22', 'close_23', 'close_24', 'candle_type_1', 'candle_type_2',
     'candle_type_3', 'candle_type_4', 'candle_type_5', 'candle_type_6',
     'candle_type_7', 'candle_type_8', 'candle_type_9', 'candle_type_10',
     'candle_type_11', 'candle_type_12', 'candle_type_13', 'candle_type_14',
     'candle_type_15', 'candle_type_16', 'candle_type_17', 'candle_type_18',
     'candle_type_19', 'candle_type_20', 'candle_type_21', 'candle_type_22',
     'candle_type_23', 'candle_type_24', 'candle_color_1', 'candle_color_2',
     'candle_color_3', 'candle_color_4', 'candle_color_5', 'candle_color_6',
     'candle_color_7', 'candle_color_8', 'candle_color_9', 'candle_color_10',
     'candle_color_11', 'candle_color_12', 'candle_color_13',
     'candle_color_14', 'candle_color_15', 'candle_color_16',
     'candle_color_17', 'candle_color_18', 'candle_color_19',
     'candle_color_20', 'candle_color_21', 'candle_color_22',
     'candle_color_23', 'candle_color_24','result']].copy()

In [302]:
for close_name in [f'close_{i}' for i in range(1, 25)]:

    data_set[close_name] = data_set[close_name] - data_set['min_low']

In [303]:
for close_name in [f'close_{i}' for i in range(1, 25)]:
    data_set[close_name] = data_set[close_name] / data_set['atr']

In [304]:
data_set = data_set.drop(columns=['min_low'])
data_set_array = np.array(data_set)
X , y = data_set_array[:, :-1], data_set_array[:, -1]

In [305]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, train_size=0.8,random_state=42)

In [306]:
from lightgbm import LGBMClassifier

lgbm_classifier = LGBMClassifier(
        # Core settings
    objective="binary",           # or "multiclass" if more than 2 labels
    boosting_type="dart",         # could try "dart" for more robustness
    n_estimators=800,             # enough trees for stable learning
    learning_rate=0.03,           # small learning rate = smoother fit

    # Tree structure
    num_leaves=31,                # typical good start, can tune 15–63
    max_depth=-1,                 # let it grow naturally first
    min_data_in_leaf=50,          # prevents overfitting with low correlations
    min_child_weight=1e-3,        # use slightly higher if data is noisy

    # Regularization
    reg_alpha=0.2,                # L1
    reg_lambda=0.8,               # L2
    bagging_fraction=0.8,         # row subsampling
    bagging_freq=1,               # every iteration
    feature_fraction=0.8,         # column subsampling
    subsample_freq=1,

    # Randomness
    random_state=42,
    n_jobs=-1,

    # Extra tricks
    verbose=-1,
    boosting="gbdt"
)

In [307]:
lgbm_classifier.fit(X_train, y_train)

0,1,2
,boosting_type,'dart'
,num_leaves,31
,max_depth,-1
,learning_rate,0.03
,n_estimators,800
,subsample_for_bin,200000
,objective,'binary'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [308]:
from sklearn.model_selection import cross_val_predict
pred_lgbm_clf =  cross_val_predict(lgbm_classifier, X_train, y_train, cv = 3)



In [309]:
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score

print(f"lgbm_clf f1 score is: {f1_score(y_train, pred_lgbm_clf)}")
print(f"lgbm_clf precision is: {precision_score(y_train, pred_lgbm_clf)}")
print(f"lgbm_clf recall score is: {recall_score(y_train, pred_lgbm_clf)}")

lgbm_clf f1 score is: 0.5915682967959528
lgbm_clf precision is: 0.5740655887936114
lgbm_clf recall score is: 0.6101718499965213


In [310]:
from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_neighbors=5)

In [311]:
knn_clf.fit(X_train, y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [314]:
pred_knn_clf =  cross_val_predict(knn_clf, X_train, y_train, cv = 3)

print(f"knn_clf f1 score is: {f1_score(y_train, pred_knn_clf)}")
print(f"knn_clf precision is: {precision_score(y_train, pred_knn_clf)}")
print(f"knn_clf recall score is: {recall_score(y_train, pred_knn_clf)}")

knn_clf f1 score is: 0.5765943682113793
knn_clf precision is: 0.5646172312616697
knn_clf recall score is: 0.5890906560912823


In [319]:
X

array([[2.04102709e-03, 2.81153000e+01, 1.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 2.00000000e+00],
       [1.99098430e-03, 2.79688394e+01, 1.00000000e+00, ...,
        0.00000000e+00, 2.00000000e+00, 2.00000000e+00],
       [1.94094328e-03, 2.79065122e+01, 1.00000000e+00, ...,
        2.00000000e+00, 2.00000000e+00, 2.00000000e+00],
       ...,
       [1.43261125e-03, 2.29742591e+01, 0.00000000e+00, ...,
        2.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.31725952e-03, 1.70676996e+01, 0.00000000e+00, ...,
        2.00000000e+00, 2.00000000e+00, 0.00000000e+00],
       [1.31237370e-03, 1.65276669e+01, 0.00000000e+00, ...,
        2.00000000e+00, 0.00000000e+00, 0.00000000e+00]],
      shape=(35484, 76))