In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from datetime import datetime
from sklearn.metrics import mean_absolute_error

# Data Preparation with Additional Features
def prepare_data(file_path, seq_length=50):
    df = pd.read_csv(file_path)
    df = df.iloc[2:].reset_index(drop=True)
    df.columns = ["Date", "Close", "High", "Low", "Open", "Volume"]
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values(by="Date")

    numeric_cols = ["Close", "High", "Low", "Open", "Volume"]
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
    df = df.dropna()

    # Adding More Features
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA50'] = df['Close'].rolling(window=50).mean()
    df['Daily Return'] = df['Close'].pct_change()
    df['Volatility'] = df['Daily Return'].rolling(window=50).std()
    df = df.dropna()

    features = ['Close', 'Volume', 'MA10', 'MA50', 'Daily Return', 'Volatility']
    data = df[features].values
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)

    X, y = [], []
    for i in range(len(data_scaled) - seq_length):
        X.append(data_scaled[i:i+seq_length])
        y.append(data_scaled[i+seq_length, 0])

    return np.array(X), np.array(y), df, scaler

# Optimized LSTM Model
def build_model(seq_length, feature_count):
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=(seq_length, feature_count)),
        Dropout(0.3),
        LSTM(100, return_sequences=False),
        Dropout(0.3),
        Dense(50, activation='relu'),
        Dense(1)
    ])

    optimizer = Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# Training and Evaluation
def train_and_evaluate(file_path, seq_length=50):
    X, y, df, scaler = prepare_data(file_path, seq_length)
    train_size = int(len(X) * 0.8)
    X_train, y_train = X[:train_size], y[:train_size]
    X_test, y_test = X[train_size:], y[train_size:]

    model = build_model(seq_length, X.shape[2])
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Absolute Error: {mae:.4f}")
    return model, df, scaler

# Run Training
file_path = "/content/AAPL_data.csv"
model, df, scaler = train_and_evaluate(file_path)


  super().__init__(**kwargs)


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 93ms/step - loss: 4.9183e-04 - val_loss: 0.0048
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 99ms/step - loss: 3.1844e-05 - val_loss: 0.0155
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 96ms/step - loss: 2.0969e-05 - val_loss: 0.0199
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 96ms/step - loss: 1.5787e-05 - val_loss: 0.0307
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 92ms/step - loss: 1.4231e-05 - val_loss: 0.0317
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 96ms/step - loss: 1.4267e-05 - val_loss: 0.0286
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 94ms/step - loss: 1.1094e-05 - val_loss: 0.0339
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 92ms/step - loss: 1.1249e-05 - v