# basic-005
https://chatgpt.com/c/6806689b-d568-800a-9027-3e4ab955adb2

Implemented Seq2Seq using LSTM + RepeatVector

In [None]:
# 1. Imports and Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense
from tensorflow.keras.optimizers import Adam

# 2. Load Dataset
df = pd.read_csv('10_year_dataset.csv', sep=' ')
df = df[['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL']]  # Use relevant columns

# 3. Normalize Features
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

# 4. Create Sliding Windows (30 -> 100)
def create_sequences(data, input_len=30, output_len=100):
    X, y = [], []
    for i in range(len(data) - input_len - output_len):
        X.append(data[i:i+input_len])
        y.append(data[i+input_len:i+input_len+output_len])
    return np.array(X), np.array(y)

X, y = create_sequences(data_scaled, 30, 100)
print("X shape:", X.shape, "y shape:", y.shape)

# 5. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Build Seq2Seq Model
input_seq = Input(shape=(30, X.shape[2]))
encoded = LSTM(128, return_sequences=False)(input_seq)
decoded = RepeatVector(100)(encoded)
decoded = LSTM(128, return_sequences=True)(decoded)
output_seq = TimeDistributed(Dense(X.shape[2]))(decoded)

model = Model(input_seq, output_seq)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.summary()

# 7. Train Model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

# 8. Predict Using New 30 Rows
recent_df = pd.read_csv('recent_30_candles.csv', sep=' ')
recent_df = recent_df[['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL']]
recent_scaled = scaler.transform(recent_df)

input_seq = recent_scaled.reshape((1, 30, X.shape[2]))
predicted_seq = model.predict(input_seq)

# 9. Inverse Transform Predicted Values
predicted_candles = scaler.inverse_transform(predicted_seq[0])

# 10. Plotting 30 Real + 100 Predicted Candles (CLOSE Price)
real_close = recent_df['CLOSE'].values
pred_close = predicted_candles[:, 3]  # index 3 = CLOSE

plt.figure(figsize=(15, 6))
plt.plot(range(30), real_close, label='Real CLOSE (30)', color='blue')
plt.plot(range(30, 130), pred_close, label='Predicted CLOSE (100)', color='red')
plt.title('Real vs Predicted Candle CLOSE Prices')
plt.xlabel('Hour Index')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()
