# architecture-16 ( Basic Regression )

## What's new:

1- Add unsupervised learning


In [18]:



import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed

In [19]:
df = pd.read_csv('datasets-16/XAGUSD-H1-rates.csv', sep='\t').dropna()

In [20]:
# Scaling Features
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[['<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<TICKVOL>']])

close_scaler = MinMaxScaler()
scaled_close = close_scaler.fit_transform(df[['<CLOSE>']])

In [21]:
# Prepare sequences
def create_sequences(features, target, window, horizon):
    X, y = [], []
    for i in range(window, len(features) - horizon):
        X.append(features[i - window:i])
        y.append(target[i:i + horizon])
    return np.array(X), np.array(y)


WINDOW_SIZE = 60
FORECAST_HORIZON = 10

X, y = create_sequences(scaled, scaled_close, WINDOW_SIZE, FORECAST_HORIZON)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# === Phase 1: Unsupervised Pretraining using LSTM Autoencoder ===
def build_lstm_autoencoder(timesteps, features, latent_dim):
    inputs = Input(shape=(timesteps, features))
    encoded = LSTM(latent_dim)(inputs)
    decoded = RepeatVector(timesteps)(encoded)
    decoded = LSTM(features, return_sequences=True)(decoded)

    autoencoder = Model(inputs, decoded)
    encoder = Model(inputs, encoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

latent_dim = 32
autoencoder, encoder = build_lstm_autoencoder(WINDOW_SIZE, X_train.shape[2], latent_dim)

print("Pretraining autoencoder...")
autoencoder.fit(
    X_train, X_train,
    epochs=3,
    batch_size=64,
    validation_split=0.2,
    callbacks=[
        callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    ],
    verbose=1
)




# === Phase 2: Supervised Fine-tuning ===
# Create LSTM model and initialize LSTM weights from encoder
def build_forecasting_model(input_shape, encoder_model, forecast_horizon):
    forecasting_model = Sequential()

    # Add pre-trained encoder LSTM
    pretrained_lstm_layer = LSTM(latent_dim, input_shape=input_shape)
    pretrained_lstm_layer.build(input_shape=(None, *input_shape))
    pretrained_lstm_layer.set_weights(encoder_model.layers[1].get_weights())  # Transfer encoder LSTM weights

    forecasting_model.add(pretrained_lstm_layer)
    forecasting_model.add(Dense(forecast_horizon))

    forecasting_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return forecasting_model

model = build_forecasting_model((X_train.shape[1], X_train.shape[2]), encoder, FORECAST_HORIZON)

# === Train forecasting model ===
mc = callbacks.ModelCheckpoint(filepath='mcp_saved_model.keras', monitor='val_loss', save_best_only=True)
es = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

print("Fine-tuning with supervised learning...")
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2,
    callbacks=[mc, es],
    verbose=1
)

Pretraining autoencoder...
Epoch 1/3


2025-07-20 13:38:20.179891: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 42249600 exceeds 10% of free system memory.
2025-07-20 13:38:20.230713: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 42249600 exceeds 10% of free system memory.


[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 40ms/step - loss: 0.0106 - val_loss: 0.0015
Epoch 2/3
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 38ms/step - loss: 8.3615e-04 - val_loss: 6.1431e-04
Epoch 3/3
[1m551/551[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 39ms/step - loss: 6.1395e-04 - val_loss: 4.8417e-04


  super().__init__(**kwargs)


TypeError: RNN.build() got an unexpected keyword argument 'input_shape'

In [None]:
# === Load the last 60 rows from a separate CSV file for prediction ===
input_df = pd.read_csv('datasets-16/new-data-for-test/rows-60-from-20240503/rows-60-from-20240503.csv',
                       sep='\t').dropna()
input_scaled = scaler.transform(
    input_df[['<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<TICKVOL>']])
input_sequence = np.expand_dims(input_scaled, axis=0)  # shape: (1, 60, 5)

# === Predict the next 10 candles + Inverse scale ===
pred = model.predict(input_sequence)
prediction = close_scaler.inverse_transform(pred)

# plot section

In [None]:
import sys

sys.path.insert(1, '../utils')
import forex_plot_utils
import os

# PARAMETERS
csv1_path = 'datasets-16/new-data-for-test/rows-60-from-20240503/latest-4-for-history.csv'
csv3_path = 'datasets-16/new-data-for-test/rows-60-from-20240503/after.csv'
plot_title = 'Actual vs Predicted Forex Closing Prices'
output_plot_path = None  # e.g., 'output.png'

# LOAD DATA FROM CSVS
historical_df = forex_plot_utils.load_csv_with_datetime(csv1_path) if os.path.exists(csv1_path) else None
actual_future_df = forex_plot_utils.load_csv_with_datetime(csv3_path) if os.path.exists(csv3_path) else None

# LOAD DATA FROM PREDICTION

# Combine <DATE> and <TIME> columns into a datetime
input_df['DATETIME'] = pd.to_datetime(input_df['<DATE>'] + ' ' + input_df['<TIME>'])

last_timestamp = input_df['DATETIME'].iloc[-1]
datetime_index = pd.date_range(start=last_timestamp + pd.Timedelta(hours=1), periods=len(prediction[0]), freq='h')

# Create DataFrame
predicted_df = pd.DataFrame({'DATETIME': datetime_index, '<CLOSE>': prediction[0]})

In [None]:
# PLOT
forex_plot_utils.plot_all_series(
    historical_df=historical_df,
    predicted_df=predicted_df,
    actual_future_df=actual_future_df,
    title=plot_title,
    output_path=output_plot_path
)

In [None]:
from datetime import datetime
import os
import pandas as pd
import matplotlib.pyplot as plt

# === Create timestamp and paths ===
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
model_filename = f'model_{timestamp}.keras'
model_path = os.path.join('saved_models', model_filename)

# Directory to hold logs and extras
log_dir = os.path.join('saved_models', f'model_{timestamp}_logs')
os.makedirs(log_dir, exist_ok=True)

# === Save model ===
model.save(model_path)

# === Save training history ===
history_df = pd.DataFrame(history.history)
history_df.to_csv(os.path.join(log_dir, 'training_history.csv'), index=False)

# === Save training loss plot ===
plt.figure()
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(log_dir, 'training_loss.png'))
plt.close()

# === Save model summary and final performance ===
with open(os.path.join(log_dir, 'model_log.txt'), 'w') as f:
    model.summary(print_fn=lambda x: f.write(x + '\n'))
    final_train_loss = history.history['loss'][-1]
    final_test_loss, final_test_mae = model.evaluate(X_test, y_test, verbose=0)
    f.write(f'\nFinal Training Loss: {final_train_loss:.6f}\n')
    f.write(f'Final Test Loss: {final_test_loss:.6f}\n')
    f.write(f'Final Test MAE : {final_test_mae:.6f}\n')
