In [10]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import TimeSeriesSplit
# 1. Load real dataset: Monthly Airline Passengers
csv_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
df = pd.read_csv(csv_url, usecols=[1], engine='python')
series = df.values.astype('float32')

# 2. Prepare sliding windows
def make_windows(data, window_size=12):
    X, y = [], []
    for i in range(len(data) - window_size): # 50 - 12 = 38
        # take `window_size` months as input
        X.append(data[i : i + window_size]) # 0-11, 1-12, ..., 
        # predict the next month
        y.append(data[i + window_size]) # 12, 13, ..., 50
    # reshape to RNN input: (samples, timesteps, features)
    return np.array(X)[..., np.newaxis], np.array(y)

window_size = 12
X, y = make_windows(series, window_size)
# train/test split (80/20)
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# 3. Build RNN model
model = models.Sequential([
    # LSTM retains a hidden state across timesteps → captures long-term patterns
    layers.LSTM( # LSTM  is better it fixes exploding/vanishing gradients
        32,
        input_shape=(window_size, 1),
        return_sequences=False,  # False: output only final state (many-to-one)
    ),
    # Dense layer to map final hidden state → forecast value
    layers.Dense(1)
])

# 4. Why this is different:
#    • Input is 3D; CNN/FFNN would flatten time → lose order info.
#    • LSTM’s gates carry context across time; CNN uses local kernels, FFNN has no memory.
#    • return_sequences=True would output at every step (seq2seq); False gives just final forecast.

model.compile(optimizer='adam', loss='mae')
model.summary()  # note shape transition: (batch, window, features) → (batch, units)

# 5. Train 
history = model.fit(
    X_train, y_train,
    epochs=500,            
    batch_size=8,      
    validation_data=(X_test, y_test),
    verbose=2
)

# 6. Evaluate
mse = model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {mse:.4f}")


  super().__init__(**kwargs)


Epoch 1/500
14/14 - 1s - 66ms/step - loss: 257.7102 - val_loss: 439.2746
Epoch 2/500
14/14 - 0s - 6ms/step - loss: 256.9594 - val_loss: 439.0483
Epoch 3/500
14/14 - 0s - 5ms/step - loss: 256.5749 - val_loss: 438.6659
Epoch 4/500
14/14 - 0s - 5ms/step - loss: 256.3350 - val_loss: 438.4789
Epoch 5/500
14/14 - 0s - 5ms/step - loss: 256.1180 - val_loss: 438.3264
Epoch 6/500
14/14 - 0s - 5ms/step - loss: 256.0154 - val_loss: 438.2336
Epoch 7/500
14/14 - 0s - 5ms/step - loss: 255.9205 - val_loss: 438.1427
Epoch 8/500
14/14 - 0s - 5ms/step - loss: 255.7217 - val_loss: 437.7246
Epoch 9/500
14/14 - 0s - 5ms/step - loss: 255.3889 - val_loss: 437.5111
Epoch 10/500
14/14 - 0s - 5ms/step - loss: 255.1165 - val_loss: 437.2270
Epoch 11/500
14/14 - 0s - 5ms/step - loss: 254.7805 - val_loss: 436.9188
Epoch 12/500
14/14 - 0s - 5ms/step - loss: 254.5329 - val_loss: 436.6957
Epoch 13/500
14/14 - 0s - 5ms/step - loss: 254.3416 - val_loss: 436.5145
Epoch 14/500
14/14 - 0s - 6ms/step - loss: 254.1224 - val_l

In [12]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

# 1. Load real data: Monthly Airline Passengers (1949–1960)
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
series = pd.read_csv(url, usecols=[1]).values.astype('float32')

# 2. Create sliding windows & labels via tf.data
window_size = 12              # past 12 months → predict next month
batch_size  = 16

# features=series[:-window_size], targets=series[window_size:]
dataset = tf.keras.utils.timeseries_dataset_from_array(
    data=series[:-window_size],        # all but last window
    targets=series[window_size:],      # shifted by window_size
    sequence_length=window_size,       # length of each input sequence
    sequence_stride=1,                 # slide by 1
    batch_size=batch_size,
)

# 3. Build Conv1D → LSTM model
model = models.Sequential([
    # Conv1D: extracts local patterns (like a CNN over time),
    # preserving temporal order vs. FFNN which would flatten it away.
    layers.Conv1D(
        filters=64, 
        kernel_size=3, 
        activation='relu',
        input_shape=(window_size, 1)
    ),
    layers.MaxPooling1D(pool_size=2),   # downsample time dimension

    # LSTM: carries hidden state across timesteps → captures long-term dependencies
    layers.LSTM(32, return_sequences=False),

    # Dropout layer for regularization (common in RNNs to prevent overfitting)
    layers.Dropout(0.2),

    # Dense head: maps final hidden state → single forecast value
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mae')
model.summary()  # see how 3D inputs flow through the Conv1D & LSTM

# 4. Train briefly (students will later replace with Optuna-driven tuning)
history = model.fit(
    dataset,
    epochs=50,
    verbose=2
)

# 5. Quick evaluation on a held-out split
#    (for simplicity, reuse part of the dataset; in practice, create a separate test set)
mse = model.evaluate(dataset, verbose=0)
print(f"Dataset MSE: {mse:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
8/8 - 1s - 111ms/step - loss: 276.8538
Epoch 2/50
8/8 - 0s - 5ms/step - loss: 275.7353
Epoch 3/50
8/8 - 0s - 5ms/step - loss: 275.1496
Epoch 4/50
8/8 - 0s - 6ms/step - loss: 274.8647
Epoch 5/50
8/8 - 0s - 5ms/step - loss: 274.4767
Epoch 6/50
8/8 - 0s - 5ms/step - loss: 274.1859
Epoch 7/50
8/8 - 0s - 5ms/step - loss: 273.8041
Epoch 8/50
8/8 - 0s - 5ms/step - loss: 273.4652
Epoch 9/50
8/8 - 0s - 5ms/step - loss: 273.2942
Epoch 10/50
8/8 - 0s - 6ms/step - loss: 272.8000
Epoch 11/50
8/8 - 0s - 5ms/step - loss: 272.5898
Epoch 12/50
8/8 - 0s - 5ms/step - loss: 272.3807
Epoch 13/50
8/8 - 0s - 5ms/step - loss: 272.2348
Epoch 14/50
8/8 - 0s - 5ms/step - loss: 272.2924
Epoch 15/50
8/8 - 0s - 5ms/step - loss: 272.1329
Epoch 16/50
8/8 - 0s - 5ms/step - loss: 271.8147
Epoch 17/50
8/8 - 0s - 5ms/step - loss: 271.6835
Epoch 18/50
8/8 - 0s - 5ms/step - loss: 271.2656
Epoch 19/50
8/8 - 0s - 5ms/step - loss: 271.1590
Epoch 20/50
8/8 - 0s - 5ms/step - loss: 270.8826
Epoch 21/50
8/8 - 0s - 5ms/

In [13]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

# 1. Load real data: Monthly Airline Passengers (1949–1960)
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
series = pd.read_csv(url, usecols=[1]).values.astype('float32')

# 2. Define train/test split
split_fraction = 0.8
split_index = int(len(series) * split_fraction)

train_series = series[:split_index]
test_series  = series[split_index - window_size:]  
# note: subtract window_size so the first test window has enough history

# 3. Create sliding windows for train and test via tf.data
window_size = 12    # past 12 months → predict next month
batch_size  = 16

train_dataset = tf.keras.utils.timeseries_dataset_from_array(
    data=train_series[:-window_size],
    targets=train_series[window_size:],
    sequence_length=window_size,
    sequence_stride=1,
    batch_size=batch_size,
)

test_dataset = tf.keras.utils.timeseries_dataset_from_array(
    data=test_series[:-window_size],
    targets=test_series[window_size:],
    sequence_length=window_size,
    sequence_stride=1,
    batch_size=batch_size,
)

# 4. Build Conv1D → LSTM model
model = models.Sequential([
    layers.Conv1D(
        filters=64, 
        kernel_size=3, 
        activation='relu',
        input_shape=(window_size, 1)
    ),
    layers.MaxPooling1D(pool_size=2),
    layers.LSTM(32, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mae')
model.summary()

# 5. Train on the training set
history = model.fit(
    train_dataset,
    epochs=50,
    verbose=2,
    validation_data=test_dataset  # monitor val loss on the test split
)

# 6. Evaluate on the held-out test set
test_loss = model.evaluate(test_dataset, verbose=0)
print(f"Test MAE: {test_loss:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
6/6 - 1s - 204ms/step - loss: 239.4190 - val_loss: 414.7872
Epoch 2/50
6/6 - 0s - 12ms/step - loss: 238.7630 - val_loss: 413.4792
Epoch 3/50
6/6 - 0s - 12ms/step - loss: 237.5968 - val_loss: 413.1910
Epoch 4/50
6/6 - 0s - 12ms/step - loss: 237.4073 - val_loss: 413.0770
Epoch 5/50
6/6 - 0s - 11ms/step - loss: 237.2396 - val_loss: 412.8484
Epoch 6/50
6/6 - 0s - 11ms/step - loss: 237.1244 - val_loss: 412.7717
Epoch 7/50
6/6 - 0s - 12ms/step - loss: 237.0809 - val_loss: 412.6941
Epoch 8/50
6/6 - 0s - 11ms/step - loss: 236.9729 - val_loss: 412.6161
Epoch 9/50
6/6 - 0s - 12ms/step - loss: 236.7840 - val_loss: 412.5379
Epoch 10/50
6/6 - 0s - 12ms/step - loss: 236.7607 - val_loss: 412.4349
Epoch 11/50
6/6 - 0s - 12ms/step - loss: 236.5496 - val_loss: 412.2802
Epoch 12/50
6/6 - 0s - 11ms/step - loss: 236.5606 - val_loss: 412.2009
Epoch 13/50
6/6 - 0s - 12ms/step - loss: 236.4436 - val_loss: 412.1217
Epoch 14/50
6/6 - 0s - 11ms/step - loss: 236.3391 - val_loss: 412.0359
Epoch 15/50
6/