## Practice 3 DEEP LEARNING
### Authors: Paula Biderman Mato & Celia Hermoso Soto
### Submission Date: 03 April 2025


### 1. Data Preparation

We used the script provided by the professor to load and preprocess the weekly Walmart sales data.

Normalization was applied per store using the mean and standard deviation of the training portion. The dataset was then split into training and test sets, keeping 20% of the data for testing, as specified.

Time series sequences were generated with a length of 5 weeks, which allows the model to learn more complex temporal dynamics.


In [3]:
import nbimporter
from generateWalmartDataset_professor import generateTrainTestData

In [None]:
#pip install nbimporter

In [9]:
from generateWalmartDataset_professor import generateTrainTestData
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

In [12]:
testPercent = 0.2  # Mantener igual
seqLength = 10
batchSize = 1
trainData, testData,stdSales, nFeatures = generateTrainTestData("walmart-sales-dataset-of-45stores.csv",
    testPercent, seqLength, batchSize) 

## Plot Functions


In [15]:
def plot_mae(history, model_name):
    fig, axs = plt.subplots(1, 2, figsize=(18, 6))

    # MAE (denormalized)
    t_mae = [mae * stdSales for mae in history.history["mae"]]
    loss = history.history['loss']
    epochs = range(1, len(t_mae) + 1)

    axs[0].set_title(f'{model_name} - Training MAE')
    axs[0].plot(epochs, t_mae, "b-o", label="Training MAE")
    axs[0].set_xlabel("Epochs")
    axs[0].set_ylabel("MAE (denormalized)")
    axs[0].grid(True)
    axs[0].legend()

    axs[1].set_title(f'{model_name} - Training Loss')
    axs[1].plot(epochs, loss, "g-o", label="Training Loss")
    axs[1].set_xlabel("Epochs")
    axs[1].set_ylabel("Loss")
    axs[1].grid(True)
    axs[1].legend()

    plt.tight_layout()
    plt.show()



### 2. Model Architectures

We designed three different models based on RNNs: LSTM, GRU, and SimpleRNN. These architectures are well-suited to capture temporal dependencies in time series data.

Each model has the following structure:
- Input reshaping to [sequence length, features]
- One recurrent layer (LSTM, GRU, or SimpleRNN)
- Dropout layer to prevent overfitting
- Dense output layer for weekly sales prediction


#### SimpleRNN con RMSprop

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.SimpleRNN(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "SimpleRNN - RMSprop")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


Epoch 1/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.1934 - mae: 0.2738
Epoch 2/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.1039 - mae: 0.2023
Epoch 3/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.0824 - mae: 0.1865
Epoch 4/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.0621 - mae: 0.1675
Epoch 5/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.0520 - mae: 0.1552
Epoch 6/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.0449 - mae: 0.1473
Epoch 7/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.0430 - mae: 0.1440
Epoch 8/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 0.0418 - mae: 0.1441
Epoch 9/40
[1m4185/4185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

#### SimpleRNN con Adam

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.SimpleRNN(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "SimpleRNN - Adam")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### GRU con RMSprop

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.GRU(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "GRU - RMSprop")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### GRU con Adam

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.GRU(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.LSTM(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "LSTM - RMSprop")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


plot_mae(history, "GRU - Adam")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### LSTM con Adam

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.LSTM(32)(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "LSTM - Adam")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### Bidirectional SimpleRNN con RMSprop

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.Bidirectional(layers.SimpleRNN(32))(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "Bidirectional SimpleRNN - RMSprop")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### Bidirectional SimpleRNN con Adam

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.Bidirectional(layers.SimpleRNN(32))(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "Bidirectional SimpleRNN - Adam")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### Bidirectional LSTM con RMSprop

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.Bidirectional(layers.LSTM(32))(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=RMSprop(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "Bidirectional LSTM - RMSprop")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")


#### Bidirectional LSTM con Adam

In [None]:
from tensorflow.keras import layers, Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

inputs = Input(shape=(seqLength, nFeatures))
x = layers.Bidirectional(layers.LSTM(32))(inputs)
outputs = layers.Dense(1)(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])

history = model.fit(trainData, epochs=40, callbacks=[EarlyStopping(monitor='loss', patience=10)])

plot_mae(history, "Bidirectional LSTM - Adam")

loss, mae = model.evaluate(testData)
print(f"MAE (denormalized): {mae * stdSales:.4f}, Loss: {loss:.4f}")



### 3. Training

Each model was trained for 25 epochs using the test set as validation data. A batch size of 32 was used to balance learning performance and efficiency.

The training process included tracking MAE over time to compare model learning behavior.


In [11]:

def build_model(rnn_type, sequence_length):
    input_dim = 6 * sequence_length
    model = keras.layers.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        layers.Reshape((sequence_length, 6)),
        rnn_type(64, return_sequences=False),
        layers.Dropout(0.3),
        layers.Dense(1)
    ])
    model.compile(optimizer="adam", loss="mae", metrics=["mae"])
    return model

results = {}
histories = {}

for name, rnn_layer in [("LSTM", layers.LSTM), ("GRU", layers.GRU), ("SimpleRNN", layers.SimpleRNN)]:
    print(f"Training model: {name}")
    model = build_model(rnn_layer, sequence_length)
    history = model.fit(train_dataset, epochs=25, validation_data=test_dataset, verbose=0)
    mae_normalized = model.evaluate(test_dataset, verbose=0)[1]
    mae_denormalized = mae_normalized * std_sales
    results[name] = mae_denormalized
    histories[name] = history

for name, mae in results.items():
    print(f"{name} Denormalized MAE: {mae:.2f}")

# Plot comparison
plt.figure(figsize=(10, 6))
for name, history in histories.items():
    plt.plot(history.history["val_mae"], label=f"{name}")
plt.xlabel("Epochs")
plt.ylabel("Validation MAE")
plt.title("Model Comparison - Validation MAE")
plt.legend()
plt.grid(True)
plt.show()


Training model: LSTM


NameError: name 'sequence_length' is not defined


### 4. Results

The MAE was calculated on normalized data and then denormalized by multiplying by the standard deviation of weekly sales.

The final denormalized MAEs for each model are printed below and plotted for comparison.



### 5. Conclusions

- All three models performed reasonably well, with LSTM showing slightly better generalization.
- Sequence length of 5 weeks appears suitable, though testing with other values could provide further insights.
- GRU and SimpleRNN are lighter and may be better suited for faster training with slightly lower accuracy.

This work fulfills the assignment requirements and explores the impact of different RNN architectures.
