## Scenario:
### Predict next day’s website traffic (visits) from the last 7 days to help plan server capacity.

### Typical LSTM/GRU uses:
 Time series: stock prices, traffic, energy demand.

 NLP: next-word prediction, translation, sentiment.

 Sequences: clickstreams, sensor readings, speech.

In [1]:



# 1. Data preparation

import numpy as np
import tensorflow as tf

tf.random.set_seed(1)
np.random.seed(1)

# -----------------------------
# 1. Create synthetic traffic data
# -----------------------------
days = np.arange(200)
# baseline 1000 visits, weekly seasonality + small upward trend
traffic = 1000 + 30 * np.sin(2 * np.pi * days / 7) + 0.8 * days

window_size = 7   # use last 7 days -> predict next day

X_list, y_list = [], []
for i in range(len(traffic) - window_size):
    X_list.append(traffic[i:i + window_size])
    y_list.append(traffic[i + window_size])

X = np.array(X_list)            # (samples, 7)
y = np.array(y_list)            # (samples,)

# Normalize (simple)
mean = X.mean()
std = X.std()
X_norm = (X - mean) / std
y_norm = (y - mean) / std

# Reshape for RNN: (batch, time_steps, features)
X_norm = X_norm[..., np.newaxis]   # (samples, 7, 1)
y_norm = y_norm[..., np.newaxis]   # (samples, 1)

# Train/validation split
split = int(0.8 * len(X_norm))
X_train, X_val = X_norm[:split], X_norm[split:]
y_train, y_val = y_norm[:split], y_norm[split:]

# This creates a realistic-looking daily traffic series with weekly seasonality and a trend.


In [2]:

# 2. LSTM architecture and working model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# -----------------------------
# 2. Build LSTM model
# -----------------------------
def build_lstm_model():
    model = Sequential([
        LSTM(
            units=32,
            activation='tanh',
            recurrent_activation='sigmoid',
            input_shape=(window_size, 1)
        ),
        Dense(1)  # predict normalized next-day traffic
    ])
    return model

lstm_model = build_lstm_model()
lstm_model.summary()

# LSTM cell behaviour (intuitive):

# Maintains a cell state that flows along time steps, plus a hidden state.


# Uses input, forget, and output gates to decide

# what new information to add,

# what to erase,

# what to output at each time step.


# This design helps keep important gradients from vanishing across many steps.

# 3. Train LSTM (optimizer + learning rate)
# -----------------------------
# 3. Compile with Adam optimizer
# -----------------------------
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)  # adaptive GD [web:51]

lstm_model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mae']
)

# -----------------------------
# 4. Train the model
# -----------------------------
history_lstm = lstm_model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_val, y_val),
    verbose=0
)

print("LSTM final train loss:", history_lstm.history['loss'][-1])
print("LSTM final val loss  :", history_lstm.history['val_loss'][-1])
# Adam performs gradient descent with adaptive learning rates and momentum, well suited for LSTM training.


# Learning rate 0.005 balances stability and speed; adjusting it changes convergence behaviour.



  super().__init__(**kwargs)


LSTM final train loss: 0.000950929126702249
LSTM final val loss  : 0.02330137975513935


In [3]:
# 4. GRU model on the same data

from tensorflow.keras.layers import GRU

# -----------------------------
# 5. Build GRU model
# -----------------------------
def build_gru_model():
    model = Sequential([
        GRU(
            units=32,
            activation='tanh',
            recurrent_activation='sigmoid',
            input_shape=(window_size, 1)
        ),
        Dense(1)
    ])
    return model

gru_model = build_gru_model()

gru_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
    loss='mse',
    metrics=['mae']
)

history_gru = gru_model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_val, y_val),
    verbose=0
)

print("GRU final train loss:", history_gru.history['loss'][-1])
print("GRU final val loss  :", history_gru.history['val_loss'][-1])

# GRU uses update and reset gates with a single hidden state, merging some LSTM gates.


# For the same units, GRU has fewer parameters and is usually faster, often with similar accuracy.



GRU final train loss: 9.648581908550113e-05
GRU final val loss  : 0.009054281748831272


In [4]:

# 5. Prediction and LSTM vs GRU comparison

# -----------------------------
# 6. Predict next day's traffic
# -----------------------------
last_week = traffic[-window_size:]
last_week_norm = ((last_week - mean) / std).reshape(1, window_size, 1)

pred_lstm_norm = lstm_model.predict(last_week_norm, verbose=0)[0, 0]
pred_gru_norm = gru_model.predict(last_week_norm, verbose=0)[0, 0]

pred_lstm = pred_lstm_norm * std + mean
pred_gru = pred_gru_norm * std + mean

print("Last 7 days traffic:", np.round(last_week, 1))
print(f"LSTM predicted next day traffic: {pred_lstm:.1f} visits")
print(f"GRU  predicted next day traffic: {pred_gru:.1f} visits")



# Start with GRU when you want speed and similar performance.
# Use LSTM when very long-term dependencies and fine memory control matter

Last 7 days traffic: [1141.4 1126.  1132.5 1156.8 1181.1 1187.6 1172.2]
LSTM predicted next day traffic: 1138.6 visits
GRU  predicted next day traffic: 1143.7 visits
