In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import datetime as dt
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [None]:
df = pd.read_csv('/Users/data/VsCodeProjects/DataScience/data/raw/eurusd_hour.csv')
df.head(5)
df.info()

In [None]:
from datetime import datetime, date, time

#перевод с строки даты в дату общего формата
def str_to_datetime(s: str):
    splited = s.split('-')
    year, month, day = int(splited[0]), int(splited[1]), int(splited[2])
    return datetime.datetime(year=year, month=month, day=day)




Изменение формата времени к объекту datetime

In [None]:
def str_hours_to_datetime(t: str):
    splited = t.split(':')
    hour, minute = int(splited[0]), int(splited[1])
    return datetime.time(hour=hour, minute=minute)

In [None]:
df.info()

In [None]:
df['Date'] = pd.to_datetime(df['Date']).dt.date
df['Time'] = pd.to_datetime(df['Time']).dt.time

In [None]:
df['Datetime'] = df.apply(lambda row: datetime.combine(date=row['Date'], time=row['Time']), axis=1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
df['Datetime']

In [None]:
plt.plot(df['Datetime'], df['BC'])


In [None]:
df.index = pd.to_datetime(df['Datetime'])
df = df.drop(columns=['Datetime'])

In [None]:
df.columns

In [None]:
df = df.drop(columns=['Date'])

In [None]:
df = df.drop(columns=['Time'])

In [None]:
df

In [None]:
df.describe()

In [None]:
df['Price_open'] = df[['BO', 'AO']].mean(axis=1)

In [None]:
df.pop('AO')

In [None]:
df.pop('BO')

In [None]:
df['Highest'] = df[['BH', 'AH']].mean(axis=1)
df['Lowest'] = df[['BL', 'AL']].mean(axis=1)

In [None]:
df.pop('BH')
df.pop('AH')
df.pop('AL')
df.pop('BL')

In [None]:
df

In [None]:
df['Price_close'] = df[['BC', 'AC']].mean(axis=1)

In [None]:
df.pop('BC')
df.pop('AC')

In [None]:
df['change'] = df[['BCh', 'ACh']].mean(axis=1)
df.pop('BCh')
df.pop('ACh')

In [None]:
df['Hour'] = df.index.hour
df['hour_sin'] = np.sin(2*np.pi * df.index.hour/24)
df['hour_cos'] = np.cos(2*np.pi * df.index.hour/24)
df.pop('Hour')
df

In [None]:
df['log_ret_body'] = np.log(df['Price_close'] / df['Price_open'])
df

In [None]:
df['range'] = (df['Highest'] - df['Lowest']) / df['Price_open']
df

In [None]:
df['upper_wick'] = (df['Highest'] - df[['Price_open', 'Price_close']].max(axis=1)) / df['Price_open']
df

In [None]:
df['lower_wick'] = (df[['Price_open', 'Price_close']].min(axis=1) - df['Lowest']) / df['Price_open']
df

In [None]:
df['close_pos'] = (df['Price_close']-df['Lowest']) / (df['Highest']-df['Lowest'])
df

In [None]:
#FOR UTC-3 зимнее время
asia_open, asia_close = 2, 10
frankfurt_open, frankfurt_close = 10, 11
london_open, london_close = 11, 19
ny_open, ny_close = 16, 0

In [None]:
df['is_asia'] = ((df.index.hour >= asia_open) & (df.index.hour <= asia_close)).astype(int)

df['is_frankfurt'] = ((df.index.hour >= frankfurt_open) & (df.index.hour <= frankfurt_close)).astype(int)

df['is_london'] = ((df.index.hour >= london_open) & (df.index.hour <= london_close)).astype(int)

# NY session crosses midnight, so use OR condition

df['is_ny'] = ((df.index.hour >= ny_open) | (df.index.hour <= ny_close)).astype(int)



In [None]:
f_change = df['is_frankfurt'].diff().abs() > 0
l_change = df['is_london'].diff().abs() > 0
n_change = df['is_ny'].diff().abs() > 0


In [None]:
df['if_change'] = (f_change | l_change | n_change).astype(int)
df

In [None]:
df.columns

In [None]:
momentum = [3, 6, 12]
volatility = [3, 6, 12]
pressure = [3, 6]


In [None]:
for i in momentum:
    df[f'mom_{i}'] = df['log_ret_body'].rolling(i).sum()

for i in volatility:
    df[f'vol_{i}'] = df['log_ret_body'].rolling(i).std()

for i in pressure:
    df[f'pressure_{i}'] = df['close_pos'].rolling(i).mean()

In [None]:
df['target_delta'] = np.log(df['Price_close'].shift(-1) / df['Price_close'])
df['target_dir'] = (df['target_delta'] > 0).astype(int)

In [None]:
df

In [None]:
ex = ['Price_open', 'Price_close', 'Highest', 'Lowest', 'change', 'target_delta', 'target_dir']

features = [i for i in df.columns if i not in ex]

In [None]:
df = df.dropna(subset=features + ['target_delta', 'target_dir']).copy()



In [None]:
assert not df[features + ['target_delta', 'target_dir']].isna().any().any(), 'NaN остались после очистки'



In [None]:
print(f'Rows after cleanup: {len(df)}')



In [None]:
X_all = df[features].values
y_delta = df['target_delta'].values
y_dir = df['target_dir'].values

In [None]:
X, y_delta_l, y_dir_l = [], [], []
window_len = 72

for i in range(len(df) - window_len):
    X.append(X_all[i:i+window_len])
    y_delta_l.append(y_delta[i + window_len])
    y_dir_l.append(y_dir[i+window_len])

X = np.array(X)
y_delta = np.array(y_delta_l).reshape(-1, 1)
y_dir = np.array(y_dir_l).reshape(-1, 1)

train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)

X_train, y_train_delta, y_train_dir = X[:train_size], y_delta[:train_size], y_dir[:train_size]
X_val, y_val_delta, y_val_dir = X[train_size:train_size+val_size], y_delta[train_size:train_size+val_size], y_dir[train_size:train_size+val_size]
X_test, y_test_delta, y_test_dir = X[train_size+val_size:], y_delta[train_size+val_size:], y_dir[train_size+val_size:]

In [None]:
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
X_train_2d = X_train.reshape(-1, X_train.shape[-1])
scaler.fit(X_train_2d)

def scale_windows(X):
    X_2d = X.reshape(-1, X.shape[-1])
    X_scaled = scaler.transform(X_2d)
    return X_scaled.reshape(X.shape)

X_train = scale_windows(X_train)
X_val   = scale_windows(X_val)
X_test  = scale_windows(X_test)



In [None]:
# Sanity-check финальных тензоров
print('Train/Val/Test:', X_train.shape, X_val.shape, X_test.shape)



In [None]:
assert np.isfinite(X_train).all() and np.isfinite(X_val).all() and np.isfinite(X_test).all(), 'В X есть NaN/inf'



In [None]:
assert np.isfinite(y_train_delta).all() and np.isfinite(y_val_delta).all() and np.isfinite(y_test_delta).all(), 'В y_delta есть NaN/inf'
assert np.isfinite(y_train_dir).all() and np.isfinite(y_val_dir).all() and np.isfinite(y_test_dir).all(), 'В y_dir есть NaN/inf'

# Балансировка класса направления (если есть дисбаланс)
n_pos = int((y_train_dir == 1).sum())
n_neg = int((y_train_dir == 0).sum())
pos_weight = (n_neg / max(n_pos, 1))

dir_sample_weight = np.where(y_train_dir.flatten() == 1, pos_weight, 1.0).astype(np.float32)
delta_sample_weight = np.ones(len(y_train_delta), dtype=np.float32)

print(f'Direction class balance train -> pos: {n_pos}, neg: {n_neg}, pos_weight: {pos_weight:.3f}')



In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

tf.keras.utils.set_random_seed(42)

seq_len = X_train.shape[1]
n_features = X_train.shape[2]

inputs = Input(shape=(seq_len, n_features))

x = Bidirectional(LSTM(96, return_sequences=True))(inputs)
x = Dropout(0.2)(x)
x = LSTM(64, return_sequences=False)(x)
x = Dropout(0.2)(x)

shared = Dense(96, activation='relu')(x)

delta_output = Dense(1, name='delta')(shared)
direction_output = Dense(1, activation='sigmoid', name='direction')(shared)

model = Model(inputs=inputs, outputs=[delta_output, direction_output])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss={
        'delta': tf.keras.losses.Huber(delta=0.5),
        'direction': 'binary_crossentropy'
    },
    loss_weights={
        'delta': 0.4,
        'direction': 0.6
    },
    metrics={
        'delta': ['mse'],
        'direction': ['accuracy', tf.keras.metrics.AUC(name='auc')]
    }
)

callbacks = [
    EarlyStopping(
        monitor='val_direction_accuracy',
        mode='max',
        patience=8,
        restore_best_weights=True
    ),
    ReduceLROnPlateau(
        monitor='val_direction_accuracy',
        mode='max',
        factor=0.5,
        patience=3,
        min_lr=1e-5
    )
]

model.summary()

history = model.fit(
    X_train,
    {'delta': y_train_delta, 'direction': y_train_dir},
    validation_data=(X_val, {'delta': y_val_delta, 'direction': y_val_dir}),
    sample_weight={'delta': delta_sample_weight, 'direction': dir_sample_weight},
    epochs=80,
    batch_size=64,
    shuffle=False,
    callbacks=callbacks,
    verbose=1
)



In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score

# Подбор порога направления по validation, а не фиксированное 0.5
val_pred_delta, val_pred_dir = model.predict(X_val, verbose=0)
thresholds = np.linspace(0.35, 0.65, 61)
val_acc_by_t = [accuracy_score(y_val_dir, (val_pred_dir >= t).astype(int)) for t in thresholds]
best_threshold = float(thresholds[int(np.argmax(val_acc_by_t))])

pred_delta, pred_dir = model.predict(X_test, verbose=0)
pred_dir_label = (pred_dir >= best_threshold).astype(int)

mse = mean_squared_error(y_test_delta, pred_delta)
mae = mean_absolute_error(y_test_delta, pred_delta)

acc_at_05 = accuracy_score(y_test_dir, (pred_dir >= 0.5).astype(int))
acc_at_best = accuracy_score(y_test_dir, pred_dir_label)

print(f'Delta prediction: MSE={mse:.6f}, MAE={mae:.6f}')
print(f'Best threshold from val: {best_threshold:.3f}')
print(f'Direction accuracy @0.5: {acc_at_05:.3f}')
print(f'Direction accuracy @best_t: {acc_at_best:.3f}')



In [None]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

accuracy = accuracy_score(y_test_dir, pred_dir_label)
f1 = f1_score(y_test_dir, pred_dir_label)
cm = confusion_matrix(y_test_dir, pred_dir_label)

print(f"Direction prediction: Accuracy={accuracy:.3f}, F1={f1:.3f}")
print("Confusion matrix:")
print(cm)



In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,4))
plt.plot(y_test_delta, label='Real delta')
plt.plot(pred_delta, label='Predicted delta', alpha=0.7)
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12,2))
plt.plot(y_test_dir, label='Real direction')
plt.plot(pred_dir_label, label='Predicted direction', alpha=0.7)
plt.legend()
plt.show()


In [None]:
# Движение на основе предсказанного направления
profit = (pred_dir_label.flatten()*2 - 1) * (y_test_delta.flatten())
cumulative_profit = profit.cumsum()

plt.figure(figsize=(12,4))
plt.plot(cumulative_profit, label='Strategy cumulative P&L')
plt.legend()
plt.show()


In [None]:
print('Total windows:', len(X))
print('Train/Val/Test windows:', len(X_train), len(X_val), len(X_test))

