In [None]:
import pandas as pd
import os
import sys

# Get the directory of the current script
try:
    # This will work if the script is being run as a file
    current_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # Use the current working directory if __file__ is not defined
    current_dir = os.getcwd()
# Move up one directory level from EDA to the parent directory
parent_dir = os.path.dirname(current_dir)
# Construct the path to the xdrive folder
xdrive_path = os.path.join(parent_dir, 'xdrive')
# Add the xdrive path to sys.path
sys.path.append(xdrive_path)

pipeline_path = os.path.join(parent_dir, 'Pipeline')
# Add the xdrive path to sys.path
sys.path.append(pipeline_path)

In [None]:
from build_a_pipeline2 import build_pipeline


x_train, x_val, x_test, y_train, y_val, y_test= build_pipeline()


In [None]:
x_train

In [None]:
x_val

In [None]:
x_test

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

from sklearn.metrics import mean_squared_error

# Assuming x_train, x_val, y_train, y_val are already defined as NumPy arrays.
# x_train, x_val should have the shape [samples, time_steps, features]
# y_train, y_val should have the shape [samples]

# Example shapes
print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_val shape: {x_val.shape}, y_val shape: {y_val.shape}")

# Convert x_train and x_val to NumPy arrays
x_train = x_train.to_numpy()
x_val = x_val.to_numpy()
# Reshape x_train and x_val to have the required 3D shape
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])  # [samples, time_steps=1, features]
x_val = x_val.reshape(x_val.shape[0], 1, x_val.shape[1])     

# 1. Build the LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])),
    Dropout(0.4),
    LSTM(100, return_sequences=True),
    Dropout(0.4),
    LSTM(100, return_sequences=True),
    Dropout(0.4),
    Dense(25, activation='relu'),  # Dense layer with 25 neurons
    Dropout(0.4),
    Dense(1)  # Output layer for price prediction
])


# 2. Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# 3. Train the model
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=100,
    batch_size=32,
    verbose=1
)

# 4. Evaluate the model
train_predictions = model.predict(x_train)
val_predictions = model.predict(x_val)

# Calculate Mean Squared Error
train_mse = mean_squared_error(y_train, train_predictions)
val_mse = mean_squared_error(y_val, val_predictions)

print(f"Train MSE: {train_mse}")
print(f"Validation MSE: {val_mse}")

# 5. Make future predictions
# Example: Predict on validation set
future_predictions = model.predict(x_val)

# Example: Scaling back the predictions (if scaled earlier)
# scaler = MinMaxScaler()
# y_val_unscaled = scaler.inverse_transform(y_val.reshape(-1, 1))
# future_predictions_unscaled = scaler.inverse_transform(future_predictions)

print(f"Predicted values: {future_predictions.flatten()}")