<p style="font-family: 'Courier New', Courier, monospace; font-size: 30px; font-weight: bold; color: blue;  text-align: left;">
Neural Networks (NN) - MLP Modeling 
</p>

In [1]:
# -----------------------------------------------------------------------------------
# Libraries for data manipulation, visualization, and modeling
# -----------------------------------------------------------------------------------

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from keras.models import Sequential
from keras.layers import (
    Dense, 
    Input, 
    BatchNormalization, 
    Dropout, 
    LeakyReLU
)
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.metrics import (
    mean_squared_error, 
    r2_score, 
    mean_absolute_percentage_error, 
    median_absolute_error
)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split

import os  # Import the os module

# Set seed for reproducibility
GLOBAL_SEED = 50
np.random.seed(GLOBAL_SEED)                      # Seed for NumPy
tf.random.set_seed(GLOBAL_SEED)                  # Seed for TensorFlow

In [2]:
# -----------------------------------------------------------------------------------
# Load dataset and display basic info
# -----------------------------------------------------------------------------------

# Path to your dataset
dataset_path = '../../all_data_files/cleaned_dataset_per_device.csv'

# Load the dataset
try:
    df = pd.read_csv(dataset_path)
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print(f"File not found at the specified path: {dataset_path}")
    import sys
    sys.exit()

# Display dataset information
print("\nDataset Information:")
df.info()

# Preview the first five rows
print("\nFirst Five Rows of the Dataset:")
display(df.head())

Dataset loaded successfully.

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 755917 entries, 0 to 755916
Data columns (total 20 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   time         755917 non-null  object 
 1   device_id    755917 non-null  object 
 2   co2          755917 non-null  float64
 3   humidity     755917 non-null  float64
 4   pm25         755917 non-null  float64
 5   pressure     755917 non-null  float64
 6   temperature  755917 non-null  float64
 7   rssi         755917 non-null  float64
 8   snr          755917 non-null  float64
 9   SF           755917 non-null  int64  
 10  frequency    755917 non-null  float64
 11  f_count      755917 non-null  float64
 12  p_count      755917 non-null  float64
 13  toa          755917 non-null  float64
 14  distance     755917 non-null  int64  
 15  c_walls      755917 non-null  int64  
 16  w_walls      755917 non-null  int64  
 17  exp_pl       755

Unnamed: 0,time,device_id,co2,humidity,pm25,pressure,temperature,rssi,snr,SF,frequency,f_count,p_count,toa,distance,c_walls,w_walls,exp_pl,n_power,esp
0,2024-09-26 11:00:52.542462+00:00,ED3,633.0,54.22,0.58,300.41,23.85,-71.0,12.2,10,867.9,94.0,104.0,0.452608,18,1,2,88.4,-83.454107,-71.254107
1,2024-09-26 11:01:52.383162+00:00,ED3,645.0,54.18,0.32,300.48,23.87,-74.0,12.5,9,867.7,95.0,105.0,0.246784,18,1,2,91.4,-86.737602,-74.237602
2,2024-09-26 11:02:52.425491+00:00,ED3,648.0,54.23,0.58,300.5,23.88,-76.0,12.2,9,867.1,96.0,106.0,0.246784,18,1,2,93.4,-88.454107,-76.254107
3,2024-09-26 11:02:52.426016+00:00,ED3,648.0,54.23,0.58,300.5,23.88,-76.0,12.2,9,867.1,96.0,106.0,0.246784,18,1,2,93.4,-88.454107,-76.254107
4,2024-09-26 11:03:52.481201+00:00,ED3,645.0,54.25,0.33,300.5,23.9,-76.0,13.2,9,868.3,97.0,107.0,0.246784,18,1,2,93.4,-89.403045,-76.203045


In [3]:
# -----------------------------------------------------------------------------------
#  Extract necessary columns, perform train-test split, and scale features
# -----------------------------------------------------------------------------------

# Define feature columns and target
feature_columns = [
    'distance', 
    'frequency', 
    'c_walls', 
    'w_walls', 
    'co2', 
    'humidity', 
    'pm25', 
    'pressure', 
    'temperature', 
    'snr'
]
target_column = 'exp_pl'

# Check for missing columns
missing_columns = set(feature_columns + [target_column]) - set(df.columns)
if missing_columns:
    raise ValueError(f"The following required columns are missing in the dataset: {missing_columns}")

# Extract features (X) and target (y)
all_features = df[feature_columns].values
PL_all = df[target_column].values

# Train-test split (80-20)
X_train_all, X_test_all, PL_train_all, PL_test_all = train_test_split(
    all_features, 
    PL_all, 
    test_size=0.2, 
    random_state=GLOBAL_SEED
)

# Feature scaling
scaler = StandardScaler()
X_train_all_scaled = scaler.fit_transform(X_train_all)
X_test_all_scaled = scaler.transform(X_test_all)

print("\nTrain-test split and feature scaling completed...\n")

Feature scaling completed.


In [4]:
# -----------------------------------------------------------------------------------
# Flexible model creation function
# -----------------------------------------------------------------------------------

def create_ann_model(layer_units, input_dim, 
                     l2_reg=0.001, 
                     dropout_rate=0.3, 
                     negative_slope=0.1):
    """
    Creates an ANN model for regression with configurable architecture and 
    hyperparameters like L2 regularization, dropout, and LeakyReLU slope.
    
    Arguments:
        layer_units    : list of integers (e.g., [64, 32]) specifying 
                         the number of neurons in each hidden layer
        input_dim      : int, dimension of the input layer
        l2_reg         : float, L2 regularization factor
        dropout_rate   : float, dropout rate
        negative_slope : float, negative slope for LeakyReLU
    Returns:
        model          : Compiled Keras Sequential model
    """
    model = Sequential()

    # Explicit input layer
    model.add(Input(shape=(input_dim,)))  

    # Add hidden layers based on the list of units
    for units in layer_units:
        model.add(Dense(units, kernel_regularizer=l2(l2_reg)))
        model.add(LeakyReLU(negative_slope=negative_slope))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))
    
    # Output layer for regression
    model.add(Dense(1, activation='linear'))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

In [None]:
# -----------------------------------------------------------------------------------
# Train the candidate architectures, store histories & predictions
# -----------------------------------------------------------------------------------

# Define our architectures:
architectures = [
    # 1-Layer Models
    {'name': 'A1', 'units': [1]}, 
    {'name': 'A2', 'units': [2]}, 
    {'name': 'A3', 'units': [3]}, 
    {'name': 'A4', 'units': [4]}, 
    {'name': 'A5', 'units': [5]},
    {'name': 'A6', 'units': [6]},
    {'name': 'A7', 'units': [7]},
    {'name': 'A8', 'units': [8]},
    {'name': 'A9', 'units': [9]},
    
    # 2-Layer Models
    {'name': 'B1', 'units': [1, 1]},     # Total units: 2
    {'name': 'B2', 'units': [1, 2]},     # Total units: 3
    {'name': 'B3', 'units': [1, 3]},     # Total units: 4
    {'name': 'B4', 'units': [1, 4]},     # Total units: 5
    {'name': 'B5', 'units': [1, 5]},     # Total units: 6
    {'name': 'B6', 'units': [1, 6]},     # Total units: 7
    {'name': 'B7', 'units': [1, 7]},     # Total units: 8
    {'name': 'B8', 'units': [1, 8]},     # Total units: 9

    {'name': 'C1', 'units': [2, 1]},     # Total units: 3
    {'name': 'C2', 'units': [2, 2]},     # Total units: 4
    {'name': 'C3', 'units': [2, 3]},     # Total units: 5
    {'name': 'C4', 'units': [2, 4]},     # Total units: 6
    {'name': 'C5', 'units': [2, 5]},     # Total units: 7
    {'name': 'C6', 'units': [2, 6]},     # Total units: 8
    {'name': 'C7', 'units': [2, 7]},     # Total units: 9

    {'name': 'D1', 'units': [3, 1]},     # Total units: 4
    {'name': 'D2', 'units': [3, 2]},     # Total units: 5
    {'name': 'D3', 'units': [3, 3]},     # Total units: 6
    {'name': 'D4', 'units': [3, 4]},     # Total units: 7
    {'name': 'D5', 'units': [3, 5]},     # Total units: 8
    {'name': 'D6', 'units': [3, 6]},     # Total units: 9

    {'name': 'E1', 'units': [4, 1]},     # Total units: 5
    {'name': 'E2', 'units': [4, 2]},     # Total units: 6
    {'name': 'E3', 'units': [4, 3]},     # Total units: 7
    {'name': 'E4', 'units': [4, 4]},     # Total units: 8
    {'name': 'E5', 'units': [4, 5]},     # Total units: 9

    {'name': 'F1', 'units': [5, 1]},     # Total units: 6
    {'name': 'F2', 'units': [5, 2]},     # Total units: 7
    {'name': 'F3', 'units': [5, 3]},     # Total units: 8
    {'name': 'F4', 'units': [5, 4]},     # Total units: 9

    {'name': 'G1', 'units': [6, 1]},     # Total units: 7
    {'name': 'G2', 'units': [6, 2]},     # Total units: 8
    {'name': 'G3', 'units': [6, 3]},     # Total units: 9

    {'name': 'H1', 'units': [7, 1]},     # Total units: 8
    {'name': 'H2', 'units': [7, 2]},     # Total units: 9

    {'name': 'I1', 'units': [8, 1]},     # Total units: 9
]

# Prepare containers for final results and for plotting
model_results = []
arch_histories = {}       # to store each model's training history
arch_predictions = {}     # to store each model's predictions on the test set

# Define callbacks (we'll reuse these for each model)
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=30,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=1e-6,
    verbose=0
)

for arch in architectures:
    print(f"\nTraining Architecture: {arch['name']} with layers {arch['units']}")
    
    # Create a fresh model
    model = create_ann_model(
        layer_units=arch['units'], 
        input_dim=X_train_all_scaled.shape[1],
        l2_reg=0.001,
        dropout_rate=0.3,
        negative_slope=0.1
    )
    
    # Train the model
    history = model.fit(
        X_train_all_scaled,
        PL_train_all,
        validation_split=0.2,
        epochs=500,
        batch_size=128,
        verbose=0,  # set to 1 or 2 if you want more logs
        callbacks=[early_stop, reduce_lr]
    )
    
    # Store the training history so we can plot later
    arch_histories[arch['name']] = history
    
    # Evaluate on training data
    train_loss, train_mae = model.evaluate(X_train_all_scaled, PL_train_all, verbose=0)
    
    # Evaluate on test data
    test_loss, test_mae = model.evaluate(X_test_all_scaled, PL_test_all, verbose=0)
    
    # Predict on the test set
    PL_pred = model.predict(X_test_all_scaled).flatten()
    
    # Store predictions for later (Actual vs. Predicted plot)
    arch_predictions[arch['name']] = PL_pred
    
    # Additional metrics
    rmse_test = np.sqrt(mean_squared_error(PL_test_all, PL_pred))
    r2_test = r2_score(PL_test_all, PL_pred)
    mape_test = mean_absolute_percentage_error(PL_test_all, PL_pred) * 100
    median_ae_test = median_absolute_error(PL_test_all, PL_pred)
    
    # Store the results in a list
    model_results.append({
        'Architecture': arch['name'],
        'Hidden Layers': str(arch['units']),
        'Train MSE': train_loss,
        'Train MAE': train_mae,
        'Test MSE': test_loss,
        'Test MAE': test_mae,
        'Test RMSE': rmse_test,
        'R2 Score': r2_test,
        'Test MAPE (%)': mape_test,
        'Test Median AE': median_ae_test
    })
    
    print(f"Completed {arch['name']} -> Test MSE: {test_loss:.4f}, Test MAE: {test_mae:.4f}, R2: {r2_test:.4f}")

# Convert the list of dicts to a DataFrame
model_results_df = pd.DataFrame(model_results)
print("\nAll architectures trained. Here is the summary:")
display(model_results_df)

# Save the results to a CSV file
model_results_df.to_csv('model_results_summary_1_.csv', index=False)
print("Results saved to your.csv file")

<p style="font-family: 'Courier New', Courier, monospace; font-size: 30px; font-weight: bold; color: blue;  text-align: left;">
Cross-Validation
</p>

In [53]:
# -----------------------------------------------------------------------------------
# K-Fold Cross-Validation for each architecture
# -----------------------------------------------------------------------------------

kfold_results = []

# K-Fold settings
n_splits = 5  # using 5 for demonstration; set to 5 or 10 as you wish
kf = KFold(n_splits=n_splits, shuffle=True, random_state=GLOBAL_SEED)

# Iterate over each architecture
for arch in architectures:
    print(f"\nPerforming {n_splits}-Fold CV for Architecture: {arch['name']}")
    fold_num = 1
    fold_metrics = []
    
    for train_idx, val_idx in kf.split(X_train_all_scaled):
        print(f"  Fold {fold_num}/{n_splits}...")

        # Split data
        X_train_fold, X_val_fold = X_train_all_scaled[train_idx], X_train_all_scaled[val_idx]
        y_train_fold, y_val_fold = PL_train_all[train_idx], PL_train_all[val_idx]
        
        # Build model
        model_cv = create_ann_model(
            layer_units=arch['units'], 
            input_dim=X_train_all_scaled.shape[1],
            l2_reg=0.001,
            dropout_rate=0.3,
            negative_slope=0.1
        )
        
        # Define callbacks
        early_stop_cv = EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
        
        reduce_lr_cv = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=0
        )
        
        # Train
        history_cv = model_cv.fit(
            X_train_fold,
            y_train_fold,
            validation_data=(X_val_fold, y_val_fold),
            epochs=100,    # reduce for demonstration
            batch_size=128,
            verbose=0,
            callbacks=[early_stop_cv, reduce_lr_cv]
        )
        
        # Evaluate on the (full) test set
        test_loss_cv, test_mae_cv = model_cv.evaluate(X_test_all_scaled, PL_test_all, verbose=0)
        pl_pred_cv = model_cv.predict(X_test_all_scaled).flatten()
        
        # Compute metrics
        rmse_cv = np.sqrt(mean_squared_error(PL_test_all, pl_pred_cv))
        r2_cv = r2_score(PL_test_all, pl_pred_cv)
        mape_cv = mean_absolute_percentage_error(PL_test_all, pl_pred_cv) * 100
        median_ae_cv = median_absolute_error(PL_test_all, pl_pred_cv)
        
        fold_metrics.append({
            'Fold': fold_num,
            'Test MSE': test_loss_cv,
            'Test MAE': test_mae_cv,
            'Test RMSE': rmse_cv,
            'R2 Score': r2_cv,
            'Test MAPE (%)': mape_cv,
            'Test Median AE': median_ae_cv
        })
        
        print(f"    Fold {fold_num} Metrics - Test MSE: {test_loss_cv:.4f} | Test RMSE: {rmse_cv:.4f} | R²: {r2_cv:.4f} | MAPE: {mape_cv:.2f}%")
        fold_num += 1
    
    # After k-fold loop, summarize
    arch_cv_df = pd.DataFrame(fold_metrics)
    arch_cv_mean = arch_cv_df.mean(numeric_only=True)
    arch_cv_std = arch_cv_df.std(numeric_only=True)
    
    kfold_results.append({
        'Architecture': arch['name'],
        'Hidden Layers': str(arch['units']),
        'Mean Test MSE': arch_cv_mean['Test MSE'],
        'Std Test MSE': arch_cv_std['Test MSE'],
        'Mean Test MAE': arch_cv_mean['Test MAE'],
        'Std Test MAE': arch_cv_std['Test MAE'],
        'Mean RMSE': arch_cv_mean['Test RMSE'],
        'Std RMSE': arch_cv_std['Test RMSE'],
        'Mean R2': arch_cv_mean['R2 Score'],
        'Std R2': arch_cv_std['R2 Score'],
        'Mean MAPE (%)': arch_cv_mean['Test MAPE (%)'],
        'Std MAPE (%)': arch_cv_std['Test MAPE (%)'],
        'Mean MedAE': arch_cv_mean['Test Median AE'],
        'Std MedAE': arch_cv_std['Test Median AE']
    })

# Display aggregated K-Fold results for all architectures
kfold_results_df = pd.DataFrame(kfold_results)
print("\nK-Fold Cross-Validation Summary for All Architectures:")
display(kfold_results_df)

# Save the results to a CSV file
kfold_results_df.to_csv('kfold_results_summary_1_.csv', index=False)
print("Results saved to your.csv file")


Performing 5-Fold CV for Architecture: A1
  Fold 1/5...
[1m4620/4620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step
    Fold 1 Metrics - Test MSE: 47.8451 | Test RMSE: 6.9158 | R²: 0.8659 | MAPE: 6.18%
  Fold 2/5...
[1m4620/4620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
    Fold 2 Metrics - Test MSE: 47.6717 | Test RMSE: 6.9034 | R²: 0.8664 | MAPE: 6.12%
  Fold 3/5...
[1m4620/4620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step
    Fold 3 Metrics - Test MSE: 47.3705 | Test RMSE: 6.8817 | R²: 0.8672 | MAPE: 6.00%
  Fold 4/5...
[1m4620/4620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
    Fold 4 Metrics - Test MSE: 45.8090 | Test RMSE: 6.7670 | R²: 0.8716 | MAPE: 5.90%
  Fold 5/5...
[1m4620/4620[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step
    Fold 5 Metrics - Test MSE: 47.9028 | Test RMSE: 6.9203 | R²: 0.8657 | MAPE: 6.00%

Performing 5-Fold CV for Architecture: A2
  Fold 1/5...
[1m4620/4620[

Unnamed: 0,Architecture,Hidden Layers,Mean Test MSE,Std Test MSE,Mean Test MAE,Std Test MAE,Mean RMSE,Std RMSE,Mean R2,Std R2,Mean MAPE (%),Std MAPE (%),Mean MedAE,Std MedAE
0,A1,"[5, 3]",47.319811,0.869577,5.260359,0.069771,6.877621,0.063629,0.867363,0.002441,6.039236,0.110289,4.383068,0.112072
1,A2,"[8, 4]",37.896123,1.643464,4.665153,0.096529,6.152966,0.13299,0.893808,0.004614,5.282801,0.117932,3.826406,0.089192
2,A3,"[10, 5]",34.783179,1.14816,4.454594,0.094313,5.894709,0.09673,0.902551,0.003218,5.05274,0.143407,3.651257,0.126643
3,B1,"[5, 3, 2]",60.75201,2.725591,6.040549,0.047477,7.791639,0.174769,0.829709,0.007649,6.947931,0.045835,5.075931,0.143542
4,B2,"[8, 4, 2]",50.87904,3.10313,5.586372,0.160108,7.128481,0.215843,0.857416,0.008695,6.461505,0.166091,4.799703,0.166991
5,B3,"[10, 5, 3]",43.108432,1.520555,5.088224,0.134262,6.562608,0.115064,0.879213,0.004267,5.873752,0.191784,4.263287,0.161215
