# 04_offline_tf_cnn_lstm_notebook.ipynb
Offline Training and Evaluation of CNN+LSTM TensorFlow Model
This notebook trains a CNN+LSTM model on 5G throughput data, visualizes loss curves, computes metrics, and displays a Run 14 heatmap.

## 1. Imports and Configuration

In [3]:
import os
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap

# Create directories
os.makedirs('models', exist_ok=True)
os.makedirs('outputs', exist_ok=True)

## 2. Load and Preprocess Data

In [4]:
# Load dataset
df = pd.read_csv('mm-5G-enriched.csv')
df.drop(columns=['debit_class'], inplace=True, errors='ignore')

# Rolling window parameters
SEQ_LEN = 10
targets = ['debit_brut', 'debit_lisse']
features = df.select_dtypes(include=[np.number]).columns.drop(targets).tolist()

# Build sequences
X, y_raw, y_smooth, runs = [], [], [], []
for run_id, grp in df.groupby('run_num'):
    grp = grp.reset_index(drop=True)
    for i in range(len(grp) - SEQ_LEN):
        X.append(grp.loc[i:i+SEQ_LEN-1, features].values)
        y_raw.append(grp.loc[i+SEQ_LEN, 'debit_brut'])
        y_smooth.append(grp.loc[i+SEQ_LEN, 'debit_lisse'])
        runs.append(run_id)

X = np.stack(X)
y_raw = np.array(y_raw)
y_smooth = np.array(y_smooth)
runs = np.array(runs)

# Scale features
ns, _, nf = X.shape
scaler = MinMaxScaler()
X_flat = scaler.fit_transform(X.reshape(-1, nf))
X = X_flat.reshape(ns, SEQ_LEN, nf)

# Save scaler
joblib.dump(scaler, 'models/scaler.gz')

# Train/validation split
X_train, X_val, y_raw_train, y_raw_val, y_smooth_train, y_smooth_val, runs_train, runs_val = train_test_split(
    X, y_raw, y_smooth, runs, test_size=0.2, random_state=42, shuffle=False
)

## 3. Build and Compile Model

In [5]:
input_layer = layers.Input(shape=(SEQ_LEN, nf))
x = layers.Conv1D(32, 3, padding='same', activation='relu')(input_layer)
x = layers.Conv1D(64, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling1D(2)(x)
x = layers.LSTM(64)(x)
raw_output = layers.Dense(1, name='raw')(x)
smooth_output = layers.Dense(1, name='smooth')(x)
model = models.Model(inputs=input_layer, outputs=[raw_output, smooth_output])
model.compile(optimizer='adam', loss='mse')

model.summary()

## 4. Train with Early Stopping

In [None]:
es = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
    X_train, [y_raw_train, y_smooth_train],
    validation_data=(X_val, [y_raw_val, y_smooth_val]),
    epochs=100, batch_size=32, callbacks=[es]
)

# Save model
model.save('models/throughput_model')
model.save_weights('models/throughput_weights.h5')

Epoch 1/100
[1m1674/1674[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 15ms/step - loss: 884026.5000 - raw_loss: 465046.6875 - smooth_loss: 418979.4062 - val_loss: 618080.2500 - val_raw_loss: 328435.5938 - val_smooth_loss: 288722.9375
Epoch 2/100
[1m1674/1674[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 14ms/step - loss: 702580.9375 - raw_loss: 374322.3438 - smooth_loss: 328258.5000 - val_loss: 503178.7188 - val_raw_loss: 271152.0000 - val_smooth_loss: 231304.4219
Epoch 3/100
[1m1674/1674[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 13ms/step - loss: 570900.8750 - raw_loss: 308411.0000 - smooth_loss: 262490.0000 - val_loss: 428630.2812 - val_raw_loss: 233996.5781 - val_smooth_loss: 194097.6875
Epoch 4/100
[1m1111/1674[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m6s[0m 12ms/step - loss: 482875.4688 - raw_loss: 263246.9688 - smooth_loss: 219628.6875

## 5. Loss Curves Visualization

In [None]:
# Plot training and validation loss
plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training vs. Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

## 6. Evaluation Metrics

In [None]:
# Predictions on validation
pred_raw, pred_smooth = model.predict(X_val)
pred_raw = pred_raw.squeeze()
pred_smooth = pred_smooth.squeeze()

# Switch: choose prediction closest to true raw
pred_final = np.where(
    np.abs(pred_raw - y_raw_val) < np.abs(pred_smooth - y_raw_val),
    pred_raw, pred_smooth
)

# Compute metrics
rmse_raw = np.sqrt(mean_squared_error(y_raw_val, pred_raw))
rmse_smooth = np.sqrt(mean_squared_error(y_smooth_val, pred_smooth))
rmse_final = np.sqrt(mean_squared_error(y_raw_val, pred_final))
mae_final = mean_absolute_error(y_raw_val, pred_final)
r2_final = r2_score(y_raw_val, pred_final)

# Display metrics
metrics = pd.DataFrame({
    'Metric': ['RMSE Raw', 'RMSE Smooth', 'RMSE Final', 'MAE Final', 'R2 Final'],
    'Value': [rmse_raw, rmse_smooth, rmse_final, mae_final, r2_final]
})
metrics

## 7. Run 14 Heatmap via Folium

In [None]:
# Prepare Run 14 data
run14_idx = np.where(runs_val == 14)[0][:20]
coords_df = df[df.run_num == 14].reset_index(drop=True)
coords = coords_df.loc[run14_idx + SEQ_LEN, ['latitude', 'longitude']].values.tolist()
preds = pred_final[run14_idx].tolist()

# Create map and heatmap
m = folium.Map(location=coords[0], zoom_start=13)
heat_data = [[lat, lon, val] for (lat, lon), val in zip(coords, preds)]
HeatMap(heat_data, radius=15).add_to(m)

# Save and display
html_path = 'outputs/run14_heatmap.html'
m.save(html_path)
print(f"Heatmap saved to {html_path}")

from IPython.display import IFrame
IFrame(html_path, width=700, height=500)