# Stock Price Trend Prediction with LSTM  

This notebook walks you through **end‑to‑end** LSTM forecasting on a stock's closing price using:
- `yfinance` for data
- Feature engineering: SMA(20/50) and RSI(14)
- Train/validation split by time
- Keras LSTM model with early stopping + model checkpoint
- Plots for **Predictions vs Actual**, **Moving Averages**, and **RSI**

> Tip: Run each cell from top to bottom the first time.

In [None]:
# If running on a fresh environment, uncomment the next cell to install dependencies.
# !pip install --upgrade pip
# !pip install yfinance pandas numpy scikit-learn matplotlib tensorflow streamlit

In [None]:
import os, math, datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import yfinance as yf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Paths
ARTIFACT_DIR = "artifacts"
GRAPH_DIR = "graphs"
os.makedirs(ARTIFACT_DIR, exist_ok=True)
os.makedirs(GRAPH_DIR, exist_ok=True)

print(tf.__version__)

In [None]:
# ==== User-configurable parameters ====
TICKER = "AAPL"                    # e.g., 'AAPL', 'MSFT', 'TCS.NS'
START_DATE = "2015-01-01"
END_DATE   = None                  # None = today
LOOKBACK   = 60                    # Sequence length
TEST_RATIO = 0.2                   # Last 20% of rows for test
EPOCHS     = 25                    # Increase for better accuracy
BATCH_SIZE = 32
LEARNING_RATE = 1e-3

# Moving Average & RSI parameters
SMA_SHORT  = 20
SMA_LONG   = 50
RSI_PERIOD = 14

In [None]:
# ==== Fetch price data ====
if END_DATE is None:
    END_DATE = datetime.date.today().strftime("%Y-%m-%d")

df = yf.download(TICKER, start=START_DATE, end=END_DATE, auto_adjust=True, progress=False)
if df.empty:
    raise RuntimeError("No data fetched. Check the ticker or dates.")

df = df[['Close', 'Volume']].copy()
df.reset_index(inplace=True)
df.rename(columns={'Date':'date','Close':'close','Volume':'volume'}, inplace=True)
df.head()

In [None]:
# ==== Feature engineering: SMA & RSI ====
def compute_rsi(series: pd.Series, period: int = 14) -> pd.Series:
    delta = series.diff()
    gain = delta.clip(lower=0.0)
    loss = -delta.clip(upper=0.0)
    avg_gain = gain.rolling(window=period, min_periods=period).mean()
    avg_loss = loss.rolling(window=period, min_periods=period).mean()
    rs = avg_gain / (avg_loss + 1e-10)
    rsi = 100 - (100 / (1 + rs))
    return rsi

df['sma_short'] = df['close'].rolling(SMA_SHORT).mean()
df['sma_long']  = df['close'].rolling(SMA_LONG).mean()
df['rsi']       = compute_rsi(df['close'], RSI_PERIOD)

# Drop initial NaNs from indicators
df = df.dropna().reset_index(drop=True)
print("Rows after indicators:", len(df))
df.tail()

In [None]:
# ==== Train/Test split by time & scaling ====
n = len(df)
test_size = int(n * TEST_RATIO)
train_df = df.iloc[:-test_size].copy()
test_df  = df.iloc[-test_size:].copy()

# Feature matrix and target
feature_cols = ['close', 'sma_short', 'sma_long', 'rsi']
target_col   = 'close'

X_train_raw = train_df[feature_cols].values
X_test_raw  = test_df[feature_cols].values

# Scale features and target separately
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_scaled = scaler_x.fit_transform(X_train_raw)
X_test_scaled  = scaler_x.transform(X_test_raw)

y_train_scaled = scaler_y.fit_transform(train_df[[target_col]].values)
y_test_scaled  = scaler_y.transform(test_df[[target_col]].values)

print("Train shape:", X_train_scaled.shape, y_train_scaled.shape)
print("Test shape :", X_test_scaled.shape, y_test_scaled.shape)

In [None]:
# ==== Sequence creation for LSTM ====
def create_sequences(features, target, lookback=60):
    X, y = [], []
    for i in range(lookback, len(features)):
        X.append(features[i - lookback:i])
        y.append(target[i])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(X_train_scaled, y_train_scaled, LOOKBACK)
X_test, y_test   = create_sequences(X_test_scaled, y_test_scaled, LOOKBACK)

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test :", X_test.shape, "y_test :", y_test.shape)

In [None]:
# ==== Build LSTM model ====
def build_model(input_shape, lr=1e-3):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)  # next close price
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='mse'
    )
    return model

model = build_model((X_train.shape[1], X_train.shape[2]), lr=LEARNING_RATE)
model.summary()

In [None]:
# ==== Train with early stopping & checkpoint ====
ckpt_path = os.path.join(ARTIFACT_DIR, "lstm_best_weights.keras")
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(ckpt_path, monitor='val_loss', save_best_only=True, save_weights_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

# Save full model (optional)
model_path = os.path.join(ARTIFACT_DIR, "lstm_model.keras")
model.save(model_path)
print("Saved weights to:", ckpt_path)
print("Saved model   to:", model_path)

In [None]:
# ==== Predict & invert scaling ====
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

y_test_true = scaler_y.inverse_transform(y_test)

# Build a timeline index aligned with the test sequences
test_index = test_df.iloc[LOOKBACK:]['date'].reset_index(drop=True)

pred_df = pd.DataFrame({
    'date': test_index,
    'actual_close': y_test_true.flatten(),
    'pred_close': y_pred.flatten()
})

pred_csv = os.path.join(ARTIFACT_DIR, f"predictions_{TICKER}.csv")
pred_df.to_csv(pred_csv, index=False)
pred_df.head()

In [None]:
# ==== Metrics ====
rmse = math.sqrt(mean_squared_error(pred_df['actual_close'], pred_df['pred_close']))
mae  = mean_absolute_error(pred_df['actual_close'], pred_df['pred_close'])
mape = np.mean(np.abs((pred_df['actual_close'] - pred_df['pred_close']) / pred_df['actual_close'])) * 100

print(f"RMSE: {rmse:.4f}  |  MAE: {mae:.4f}  |  MAPE: {mape:.2f}%")

# ==== Plot: Predictions vs Actual ====
plt.figure(figsize=(12,5))
plt.plot(pred_df['date'], pred_df['actual_close'], label='Actual')
plt.plot(pred_df['date'], pred_df['pred_close'], label='Predicted')
plt.title(f"{TICKER}: Actual vs Predicted Close")
plt.xlabel("Date"); plt.ylabel("Price"); plt.legend(); plt.tight_layout()

pred_plot_path = os.path.join(GRAPH_DIR, f"pred_vs_actual_{TICKER}.png")
plt.savefig(pred_plot_path, dpi=140)
plt.show()

# ==== Plot: Close with SMAs ====
plt.figure(figsize=(12,5))
plt.plot(df['date'], df['close'], label='Close')
plt.plot(df['date'], df['sma_short'], label=f"SMA{SMA_SHORT}")
plt.plot(df['date'], df['sma_long'], label=f"SMA{SMA_LONG}")
plt.title(f"{TICKER}: Close & Moving Averages")
plt.xlabel("Date"); plt.ylabel("Price"); plt.legend(); plt.tight_layout()

sma_plot_path = os.path.join(GRAPH_DIR, f"sma_{TICKER}.png")
plt.savefig(sma_plot_path, dpi=140)
plt.show()

# ==== Plot: RSI ====
plt.figure(figsize=(12,3))
plt.plot(df['date'], df['rsi'], label='RSI')
plt.axhline(70, linestyle='--'); plt.axhline(30, linestyle='--')
plt.title(f"{TICKER}: RSI({RSI_PERIOD})")
plt.xlabel("Date"); plt.ylabel("RSI"); plt.tight_layout()

rsi_plot_path = os.path.join(GRAPH_DIR, f"rsi_{TICKER}.png")
plt.savefig(rsi_plot_path, dpi=140)
plt.show()

print("Saved:")
print("-", pred_plot_path)
print("-", sma_plot_path)
print("-", rsi_plot_path)

In [None]:
# ==== (Optional) One-step ahead forecast for the next trading day ====
# Use the last LOOKBACK rows from the FULL dataset (scaled) to predict the next close.
# Recompute scalers on the entire data so that the latest point is included for demo.
full_X = df[['close','sma_short','sma_long','rsi']].values
full_y = df[['close']].values

full_X_scaled = scaler_x.transform(full_X)  # use train-fitted scaler_x
full_y_scaled = scaler_y.transform(full_y)  # use train-fitted scaler_y

last_seq = full_X_scaled[-LOOKBACK:]
next_scaled = model.predict(last_seq[np.newaxis, ...])
next_price = scaler_y.inverse_transform(next_scaled)[0,0]

print(f"Next-day close (model one-step forecast): {next_price:.2f}")

## What to try next
- Tune `EPOCHS`, `LOOKBACK`, `SMA` window sizes, and network depth.
- Add more features (e.g., EMA, MACD, Bollinger Bands).
- Try walk-forward validation to mimic live trading conditions.
- Deploy the model behind an API or interactive dashboard (see `streamlit_app.py`).

All artifacts (weights, model, and plots) are saved under `artifacts/` and `graphs/`.