In [None]:
import pandas as pd

# Load stock data (replace with your file)
df = pd.read_csv("AAPL.csv", index_col="Date", parse_dates=True)

# Display dataset summary
print("🔹 Dataset Overview:\n", df.info())

print ("***********************************")

# Display first few rows
print("\n🔹 Data Head:\n", df.head())
print ("***********************************")
# Check for missing values
print("\n🔹 Missing Values:\n", df.isnull().sum())


In [None]:
# Drop rows with missing values
df.dropna(inplace=True)

# OR Fill missing values using forward-fill (last known value)
df.fillna(method='ffill', inplace=True)

# Check again for missing values
print("\n🔹 Missing Values After Cleaning:\n", df.isnull().sum())


In [None]:
import numpy as np

def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - (1.5 * IQR)
    upper_bound = Q3 + (1.5 * IQR)
    
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

# Apply to key columns
df = remove_outliers(df, "Close")
df = remove_outliers(df, "Volume")


In [None]:
diff_close_series = df['Close'].diff().dropna()

In [None]:
# Create lagged features from differences
lagged_data = pd.concat(
    [diff_close_series.shift(i) for i in range(2, -1, -1)],
    axis=1
).dropna()
lagged_data.columns = [f'lag_{i}' for i in range(2, -1, -1)]

In [None]:
# Split into 50% train and 50% test
split_idx = int(len(lagged_data) * 0.8)
train_data = lagged_data.iloc[:split_idx]
test_data = lagged_data.iloc[split_idx:]

In [None]:
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

In [None]:
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

In [None]:
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, LeakyReLU, Input ,GRU 
from tensorflow.keras.optimizers import Adagrad ,Adadelta ,Adam

In [None]:
# Define and compile the GRU model
model = Sequential()
model.add(Input(shape=(X_train_reshaped.shape[1], 1)))  # Explicit input layer
model.add(GRU(50, return_sequences=False))  # GRU instead of LSTM
model.add(LeakyReLU(negative_slope=0.01))
model.add(Dense(1))
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model.fit(X_train_reshaped, y_train_scaled, epochs=200, batch_size=16, verbose=1)


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

y_train_pred_scaled = model.predict(X_train)
y_test_pred_scaled = model.predict(X_test)

# Inverse transform predictions
y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled)
y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled)

# Evaluate performance
mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

In [None]:
print(f"Train MSE: {mse_train:.6f}")
print(f"Train R2: {r2_train:.6f}")
print(f"Test MSE: {mse_test:.6f}")
print(f"Test R2: {r2_test:.6f}")

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual Differences (Test)')
plt.plot(y_test_pred, label='Predicted Differences (Test)')
plt.title("Close Price Differences: Actual vs Predicted (Test Set) with LeakyReLU")
plt.legend()
plt.show()