In [17]:
import numpy as np
import pandas as pd
import talib
import numpy
import datetime as dt
# pd.set_option('display.max_rows', None)  # show all rows
# pd.set_option('display.max_columns', None)  # show all columns
# pd.set_option('display.width', None)  # don't wrap lines
# pd.set_option('display.max_colwidth', None)  # show full text in cells

df = pd.read_excel(r'gbp_usd_2015.xlsx')
df['atr'] = talib.ATR(
    df['high'],
    df['low'],
    df['close'],
    timeperiod=24
)
df['ma'] = talib.KAMA(
    df['close'],
    timeperiod=14
)

df['adx'] = talib.ADX(
    df['high'],
    df['low'],
    df['close'],
    timeperiod=24
)

df['rsi'] = talib.RSI(
    df['close'],
    timeperiod=14
)



In [18]:
df = df.iloc[47:].reset_index(drop=True)

In [19]:
df = df.drop(columns=['Unnamed: 0'])

In [20]:
def set_ma_status(row):
    if row['close'] > row['ma']:
        return 'upper'
    if row['close'] < row['ma']:
        return 'under'
    return 'eq'


df['ma_status'] = df.apply(set_ma_status, axis=1)

In [21]:
df['id'] = df.index

In [22]:
import numpy as np

In [23]:
df['entry'] = df['close']
df['tp_buy'] = df['close'] + (df['atr'] * 2.4)
df['tp_sell'] = df['close'] - (df['atr'] * 2.4)

In [24]:
df['datetime'] = pd.to_datetime(df['datetime'])
df['timestamp'] = df['datetime'].apply(lambda d: int(d.timestamp()))

In [25]:
array = np.array(df[['timestamp', 'open', 'high', 'low', 'close', 'atr', 'adx', 'tp_buy', 'tp_sell']])

In [26]:
patterns = []
array_size = len(array)

for i, element in enumerate(array):
    if i + 24 == array_size:
        break

    pat = array[i:i + 24, :]
    patterns.append(pat)

In [27]:
def get_trade_from_pattern(pattern: np.array):
    timestamp = pattern[-1, 0].copy()
    entry = pattern[-1, 4].copy()
    tp_buy = pattern[-1, 7].copy()
    tp_sell = pattern[-1, 8].copy()

    return timestamp, entry, tp_buy, tp_sell


any_trade = get_trade_from_pattern(patterns[10])

In [28]:
def evaluate_trade_result(chart: np.array, trade: tuple):
    timestamp, entry, tp_buy, tp_sell = trade

    backtest_chart = chart[chart[:, 0] >= timestamp]

    for i, element in enumerate(backtest_chart):
        price_high = element[2]
        price_low = element[3]

        # print(f'entry: {entry} price_high: {price_high} price_low: {price_low}')
        # print(f'tp_buy: {tp_buy} tp_sell: {tp_sell}')

        if price_high > tp_buy:
            return "buy"

        if price_low < tp_sell:
            return "sell"

    return "None"

In [29]:
trade_result = []
for pat in patterns:
    trade = get_trade_from_pattern(pat)
    result = evaluate_trade_result(array, trade)
    trade_result.append((trade[0], result))

In [30]:
patterns_for_train = []
for pat in patterns:
    id = pat[-1, 0]
    closes = pat[:, 4]
    atr = pat[-1, 5]
    adx = pat[-1, 6]
    result = np.array([id, atr, adx])
    result = np.hstack((result, closes))
    patterns_for_train.append(result)

In [31]:
patterns_for_train_df = pd.DataFrame(patterns_for_train, columns=[
    'timestamp',
    'atr',
    'adx',
    'close_1',
    'close_2',
    'close_3',
    'close_4',
    'close_5',
    'close_6',
    'close_7',
    'close_8',
    'close_9',
    'close_10',
    'close_11',
    'close_12',
    'close_13',
    'close_14',
    'close_15',
    'close_16',
    'close_17',
    'close_18',
    'close_19',
    'close_20',
    'close_21',
    'close_22',
    'close_23',
    'close_24',
])

In [32]:
trade_result_df = pd.DataFrame(trade_result, columns=['timestamp', 'result'])

In [33]:
patterns_for_train_df = pd.merge(patterns_for_train_df, trade_result_df, on='timestamp')

In [34]:
df_ma_statuses = df[['timestamp', 'ma_status', 'rsi']]

In [35]:
final_patterns_for_training = pd.merge(patterns_for_train_df, df_ma_statuses, on='timestamp')

In [36]:
final_patterns_for_training = final_patterns_for_training[
    final_patterns_for_training['result'] != 'None'
    ]

In [37]:
final_patterns_for_training['result'] = (
    final_patterns_for_training['result'].apply(lambda x: 1 if x == 'buy' else 0)
)

In [38]:
final_patterns_for_training['ma_status'] = (
    final_patterns_for_training['ma_status'].apply(lambda x: 1 if x == 'upper' else 0)
)

In [39]:
final_patterns_for_training.columns

Index(['timestamp', 'atr', 'adx', 'close_1', 'close_2', 'close_3', 'close_4',
       'close_5', 'close_6', 'close_7', 'close_8', 'close_9', 'close_10',
       'close_11', 'close_12', 'close_13', 'close_14', 'close_15', 'close_16',
       'close_17', 'close_18', 'close_19', 'close_20', 'close_21', 'close_22',
       'close_23', 'close_24', 'result', 'ma_status', 'rsi'],
      dtype='object')

In [40]:
data_set = final_patterns_for_training[
    ['atr', 'adx', 'ma_status', 'rsi', 'close_1', 'close_2', 'close_3', 'close_4',
       'close_5', 'close_6', 'close_7', 'close_8', 'close_9', 'close_10',
       'close_11', 'close_12', 'close_13', 'close_14', 'close_15', 'close_16',
       'close_17', 'close_18', 'close_19', 'close_20', 'close_21', 'close_22',
       'close_23', 'close_24', 'result']].copy()

In [41]:
data_set_array = np.array(data_set)
np.random.shuffle(data_set_array)

In [42]:
data_set_array.shape

(66719, 29)

In [43]:
X , y = data_set_array[:, :-1], data_set_array[:, -1]

In [44]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, train_size=0.8,random_state=42)

In [45]:
from lightgbm import LGBMClassifier

lgbm_classifier = LGBMClassifier(
        # Core settings
    objective="binary",           # or "multiclass" if more than 2 labels
    boosting_type="gbdt",         # could try "dart" for more robustness
    n_estimators=800,             # enough trees for stable learning
    learning_rate=0.03,           # small learning rate = smoother fit

    # Tree structure
    num_leaves=31,                # typical good start, can tune 15–63
    max_depth=-1,                 # let it grow naturally first
    min_data_in_leaf=50,          # prevents overfitting with low correlations
    min_child_weight=1e-3,        # use slightly higher if data is noisy

    # Regularization
    reg_alpha=0.2,                # L1
    reg_lambda=0.8,               # L2
    bagging_fraction=0.8,         # row subsampling
    bagging_freq=1,               # every iteration
    feature_fraction=0.8,         # column subsampling
    subsample_freq=1,

    # Randomness
    random_state=42,
    n_jobs=-1,

    # Extra tricks
    verbose=-1,
    boosting="gbdt"
)

In [46]:
lgbm_classifier.fit(X_train, y_train)

0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.03
,n_estimators,800
,subsample_for_bin,200000
,objective,'binary'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [47]:
from sklearn.model_selection import cross_val_predict
pred_lgbm_clf =  cross_val_predict(lgbm_classifier, X_train, y_train, cv = 3)




In [48]:
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score

print(f"lgbm_clf f1 score is: {f1_score(y_train, pred_lgbm_clf)}")
print(f"lgbm_clf precision is: {precision_score(y_train, pred_lgbm_clf)}")
print(f"lgbm_clf recall score is: {recall_score(y_train, pred_lgbm_clf)}")

lgbm_clf f1 score is: 0.6957190182477075
lgbm_clf precision is: 0.6840135319945296
lgbm_clf recall score is: 0.7078321105359204


In [49]:
y_pred_test = lgbm_classifier.predict(X_test)



In [50]:
result = y_pred_test == y_test

In [51]:
result_df = pd.DataFrame(result, columns=['result'])

In [52]:
len(result_df[result_df['result'] == True]) / len(y_test)

0.7013639088729017

In [53]:
len(result_df[result_df['result'] == False])

3985

In [54]:
final_model = LGBMClassifier(
        # Core settings
    objective="binary",           # or "multiclass" if more than 2 labels
    boosting_type="gbdt",         # could try "dart" for more robustness
    n_estimators=800,             # enough trees for stable learning
    learning_rate=0.03,           # small learning rate = smoother fit

    # Tree structure
    num_leaves=31,                # typical good start, can tune 15–63
    max_depth=-1,                 # let it grow naturally first
    min_data_in_leaf=50,          # prevents overfitting with low correlations
    min_child_weight=1e-3,        # use slightly higher if data is noisy

    # Regularization
    reg_alpha=0.2,                # L1
    reg_lambda=0.8,               # L2
    bagging_fraction=0.8,         # row subsampling
    bagging_freq=1,               # every iteration
    feature_fraction=0.8,         # column subsampling
    subsample_freq=1,

    # Randomness
    random_state=42,
    n_jobs=-1,

    # Extra tricks
    verbose=-1,
    boosting="gbdt"
)

In [55]:
final_model.fit(X, y)

0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,-1
,learning_rate,0.03
,n_estimators,800
,subsample_for_bin,200000
,objective,'binary'
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [56]:
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score

pred_final_lgbm_clf =  cross_val_predict(final_model, X, y, cv = 3)

print(f"lgbm_clf f1 score is: {f1_score(y, pred_final_lgbm_clf)}")
print(f"lgbm_clf precision is: {precision_score(y, pred_final_lgbm_clf)}")
print(f"lgbm_clf recall score is: {recall_score(y, pred_final_lgbm_clf)}")



lgbm_clf f1 score is: 0.7024705569822014
lgbm_clf precision is: 0.6943950048142853
lgbm_clf recall score is: 0.710736150515156


In [57]:
import joblib

joblib.dump(final_model, f"predictor0_0.0.1.pkl")


['predictor0_0.0.1.pkl']

In [58]:
y_test_df = pd.DataFrame(y_test, columns=['y'])

In [59]:
len(y_test_df[y_test_df['y'] == 0])

6710

In [60]:
df

Unnamed: 0,datetime,open,high,low,close,timeframe,atr,ma,adx,rsi,ma_status,id,entry,tp_buy,tp_sell,timestamp
0,2015-01-06 08:00:00+00:00,1.52601,1.52698,1.52550,1.52595,h1,0.002160,1.525969,66.238067,40.017309,under,0,1.52595,1.531134,1.520766,1420531200
1,2015-01-06 09:00:00+00:00,1.52594,1.52594,1.52291,1.52308,h1,0.002197,1.525939,65.605745,31.374558,under,1,1.52308,1.528352,1.517808,1420534800
2,2015-01-06 10:00:00+00:00,1.52303,1.52471,1.52138,1.52426,h1,0.002244,1.525921,65.112615,37.364334,under,2,1.52426,1.529645,1.518875,1420538400
3,2015-01-06 11:00:00+00:00,1.52427,1.52459,1.51874,1.52015,h1,0.002394,1.525290,64.814575,28.148651,under,3,1.52015,1.525896,1.514404,1420542000
4,2015-01-06 12:00:00+00:00,1.52015,1.52029,1.51756,1.51861,h1,0.002408,1.524393,64.599982,25.600729,under,4,1.51861,1.524389,1.512831,1420545600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66742,2025-10-15 16:00:00+00:00,1.33456,1.33528,1.33333,1.33492,h1,0.001664,1.334804,11.591788,56.047021,upper,66742,1.33492,1.338912,1.330928,1760544000
66743,2025-10-15 17:00:00+00:00,1.33493,1.33667,1.33462,1.33650,h1,0.001680,1.334917,11.526248,62.076104,upper,66743,1.33650,1.340531,1.332469,1760547600
66744,2025-10-15 18:00:00+00:00,1.33649,1.34028,1.33620,1.33937,h1,0.001780,1.335354,12.132205,70.099394,upper,66744,1.33937,1.343641,1.335099,1760551200
66745,2025-10-15 19:00:00+00:00,1.33937,1.33943,1.33844,1.33905,h1,0.001747,1.335612,12.712913,68.362746,upper,66745,1.33905,1.343242,1.334858,1760554800
