# 05 - Deep Learning Model Training
## Year Prediction from Audio Features

### Models to Train:
1. Simple Neural Network (MLP)
2. Deep Neural Network
3. Neural Network with Regularization
4. Residual Network Architecture

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time
import json

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers, callbacks
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Add
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## 1. Load Data

In [2]:
X_train = np.load('data/splits/X_train.npy')
X_val = np.load('data/splits/X_val.npy')
X_test = np.load('data/splits/X_test.npy')

y_train = np.load('data/splits/y_train.npy')
y_val = np.load('data/splits/y_val.npy')
y_test = np.load('data/splits/y_test.npy')

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")

input_dim = X_train.shape[1]
print(f"\nInput dimension: {input_dim}")

Training set: (394074, 90)
Validation set: (69543, 90)
Test set: (51514, 90)

Input dimension: 90


## 2. Helper Functions

In [3]:
def evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test):
    """Evaluate model on all datasets."""
    results = {}
    
    for name, X, y in [('train', X_train, y_train), ('val', X_val, y_val), ('test', X_test, y_test)]:
        y_pred = model.predict(X, verbose=0).flatten()
        results[f'{name}_mse'] = mean_squared_error(y, y_pred)
        results[f'{name}_rmse'] = np.sqrt(results[f'{name}_mse'])
        results[f'{name}_mae'] = mean_absolute_error(y, y_pred)
        results[f'{name}_r2'] = r2_score(y, y_pred)
    
    return results

def print_results(results, model_name):
    """Print formatted results."""
    print(f"\n{'='*60}")
    print(f"{model_name} Results")
    print(f"{'='*60}")
    print(f"{'Set':<10} {'MSE':<12} {'RMSE':<10} {'MAE':<10} {'R²':<10}")
    print("-" * 60)
    for set_name in ['train', 'val', 'test']:
        print(f"{set_name.capitalize():<10} {results[f'{set_name}_mse']:<12.4f} {results[f'{set_name}_rmse']:<10.4f} {results[f'{set_name}_mae']:<10.4f} {results[f'{set_name}_r2']:<10.4f}")

def plot_training_history(history, model_name):
    """Plot training history."""
    fig = make_subplots(rows=1, cols=2, subplot_titles=['Loss', 'MAE'])
    
    fig.add_trace(
        go.Scatter(y=history.history['loss'], name='Train Loss', mode='lines'),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(y=history.history['val_loss'], name='Val Loss', mode='lines'),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(y=history.history['mae'], name='Train MAE', mode='lines'),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(y=history.history['val_mae'], name='Val MAE', mode='lines'),
        row=1, col=2
    )
    
    fig.update_layout(title=f'{model_name} - Training History', template='plotly_white', height=400)
    return fig

In [4]:
all_results = {}

## 3. Callbacks Setup

In [5]:
def get_callbacks(model_name, patience=15):
    """Get standard callbacks for training."""
    return [
        EarlyStopping(
            monitor='val_loss',
            patience=patience,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        ),
        ModelCheckpoint(
            f'models/dl/{model_name}_best.keras',
            monitor='val_loss',
            save_best_only=True,
            verbose=0
        )
    ]

## 4. Model 1: Simple MLP

In [6]:
def build_simple_mlp(input_dim):
    model = Sequential([
        Dense(128, activation='relu', input_dim=input_dim),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    return model

simple_mlp = build_simple_mlp(input_dim)
simple_mlp.summary()

In [7]:
print("Training Simple MLP...")
start_time = time.time()

history_simple = simple_mlp.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=256,
    callbacks=get_callbacks('simple_mlp'),
    verbose=1
)

train_time = time.time() - start_time
results_simple = evaluate_model(simple_mlp, X_train, y_train, X_val, y_val, X_test, y_test)
results_simple['train_time'] = train_time
results_simple['epochs'] = len(history_simple.history['loss'])

print_results(results_simple, "Simple MLP")
all_results['Simple MLP'] = results_simple

fig = plot_training_history(history_simple, "Simple MLP")
fig.write_html('reports/figures/25_simple_mlp_training.html')
fig.show()

Training Simple MLP...
Epoch 1/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 325381.6250 - mae: 340.7682 - val_loss: 16370.0186 - val_mae: 95.4515 - learning_rate: 0.0010
Epoch 2/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 2775.1821 - mae: 30.5456 - val_loss: 318.2063 - val_mae: 12.9001 - learning_rate: 0.0010
Epoch 3/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 190.8214 - mae: 9.7397 - val_loss: 133.5341 - val_mae: 8.3127 - learning_rate: 0.0010
Epoch 4/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 131.3513 - mae: 8.3961 - val_loss: 121.9742 - val_mae: 7.9726 - learning_rate: 0.0010
Epoch 5/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 128.3538 - mae: 8.3886 - val_loss: 121.2061 - val_mae: 8.0464 - learning_rate: 0.0010
Epoch 6/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━

## 5. Model 2: Deep Neural Network with Regularization

In [8]:
def build_deep_nn(input_dim):
    model = Sequential([
        Dense(256, input_dim=input_dim),
        BatchNormalization(),
        layers.Activation('relu'),
        Dropout(0.3),
        
        Dense(128),
        BatchNormalization(),
        layers.Activation('relu'),
        Dropout(0.3),
        
        Dense(64),
        BatchNormalization(),
        layers.Activation('relu'),
        Dropout(0.2),
        
        Dense(32),
        BatchNormalization(),
        layers.Activation('relu'),
        
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    return model

deep_nn = build_deep_nn(input_dim)
deep_nn.summary()

In [9]:
print("Training Deep Neural Network...")
start_time = time.time()

history_deep = deep_nn.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=256,
    callbacks=get_callbacks('deep_nn'),
    verbose=1
)

train_time = time.time() - start_time
results_deep = evaluate_model(deep_nn, X_train, y_train, X_val, y_val, X_test, y_test)
results_deep['train_time'] = train_time
results_deep['epochs'] = len(history_deep.history['loss'])

print_results(results_deep, "Deep NN")
all_results['Deep NN'] = results_deep

fig = plot_training_history(history_deep, "Deep NN")
fig.write_html('reports/figures/26_deep_nn_training.html')
fig.show()

Training Deep Neural Network...
Epoch 1/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 3771271.5000 - mae: 1941.1777 - val_loss: 3368979.2500 - val_mae: 1834.6967 - learning_rate: 0.0010
Epoch 2/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 2749586.0000 - mae: 1651.8871 - val_loss: 2115802.5000 - val_mae: 1449.5551 - learning_rate: 0.0010
Epoch 3/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 1488617.1250 - mae: 1204.1788 - val_loss: 942163.0625 - val_mae: 959.7411 - learning_rate: 0.0010
Epoch 4/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 534843.2500 - mae: 705.2209 - val_loss: 230541.5938 - val_mae: 468.3349 - learning_rate: 0.0010
Epoch 5/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 90440.4062 - mae: 269.3279 - val_loss: 16763.9961 - val_mae: 120.6503 - learning_rate:

## 6. Model 3: Wide and Deep Network

In [10]:
def build_wide_deep(input_dim):
    inputs = Input(shape=(input_dim,))
    
    wide = Dense(1)(inputs)
    
    deep = Dense(256, activation='relu')(inputs)
    deep = BatchNormalization()(deep)
    deep = Dropout(0.3)(deep)
    
    deep = Dense(128, activation='relu')(deep)
    deep = BatchNormalization()(deep)
    deep = Dropout(0.3)(deep)
    
    deep = Dense(64, activation='relu')(deep)
    deep = BatchNormalization()(deep)
    deep = Dropout(0.2)(deep)
    
    deep = Dense(1)(deep)
    
    output = Add()([wide, deep])
    
    model = Model(inputs=inputs, outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    return model

wide_deep = build_wide_deep(input_dim)
wide_deep.summary()

In [11]:
print("Training Wide and Deep Network...")
start_time = time.time()

history_wide_deep = wide_deep.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=256,
    callbacks=get_callbacks('wide_deep'),
    verbose=1
)

train_time = time.time() - start_time
results_wide_deep = evaluate_model(wide_deep, X_train, y_train, X_val, y_val, X_test, y_test)
results_wide_deep['train_time'] = train_time
results_wide_deep['epochs'] = len(history_wide_deep.history['loss'])

print_results(results_wide_deep, "Wide and Deep")
all_results['Wide and Deep'] = results_wide_deep

fig = plot_training_history(history_wide_deep, "Wide and Deep")
fig.write_html('reports/figures/27_wide_deep_training.html')
fig.show()

Training Wide and Deep Network...
Epoch 1/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 3531553.5000 - mae: 1876.1956 - val_loss: 2723358.0000 - val_mae: 1650.2158 - learning_rate: 0.0010
Epoch 2/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 1726679.5000 - mae: 1295.5132 - val_loss: 835651.1875 - val_mae: 914.0671 - learning_rate: 0.0010
Epoch 3/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 354006.8438 - mae: 554.0575 - val_loss: 59450.1914 - val_mae: 243.5818 - learning_rate: 0.0010
Epoch 4/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 27509.2188 - mae: 132.6462 - val_loss: 410.0836 - val_mae: 18.4622 - learning_rate: 0.0010
Epoch 5/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 15701.7871 - mae: 99.9109 - val_loss: 176.9984 - val_mae: 11.6358 - learning_rate: 0.0010
Epoch 6

## 7. Model 4: Residual Network

In [12]:
def build_resnet(input_dim):
    inputs = Input(shape=(input_dim,))
    
    x = Dense(128, activation='relu')(inputs)
    x = BatchNormalization()(x)
    
    residual = x
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    x = Dense(128)(x)
    x = Add()([x, residual])
    x = layers.Activation('relu')(x)
    x = BatchNormalization()(x)
    
    residual = x
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    x = Dense(128)(x)
    x = Add()([x, residual])
    x = layers.Activation('relu')(x)
    x = BatchNormalization()(x)
    
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    output = Dense(1)(x)
    
    model = Model(inputs=inputs, outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    return model

resnet = build_resnet(input_dim)
resnet.summary()

In [13]:
print("Training Residual Network...")
start_time = time.time()

history_resnet = resnet.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=256,
    callbacks=get_callbacks('resnet'),
    verbose=1
)

train_time = time.time() - start_time
results_resnet = evaluate_model(resnet, X_train, y_train, X_val, y_val, X_test, y_test)
results_resnet['train_time'] = train_time
results_resnet['epochs'] = len(history_resnet.history['loss'])

print_results(results_resnet, "ResNet")
all_results['ResNet'] = results_resnet

fig = plot_training_history(history_resnet, "ResNet")
fig.write_html('reports/figures/28_resnet_training.html')
fig.show()

Training Residual Network...
Epoch 1/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 276204.9062 - mae: 231.1504 - val_loss: 7075.7896 - val_mae: 56.1128 - learning_rate: 0.0010
Epoch 2/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 6081.7021 - mae: 56.5076 - val_loss: 818.7928 - val_mae: 24.1311 - learning_rate: 0.0010
Epoch 3/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 2236.2083 - mae: 33.2769 - val_loss: 570.0016 - val_mae: 18.6804 - learning_rate: 0.0010
Epoch 4/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 1007.5009 - mae: 23.3001 - val_loss: 197.0549 - val_mae: 10.0891 - learning_rate: 0.0010
Epoch 5/100
[1m1540/1540[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 651.9865 - mae: 19.1978 - val_loss: 356.3009 - val_mae: 13.5139 - learning_rate: 0.0010
Epoch 6/100
[1m1540/1540[0m [32m━━

## 8. Model Comparison

In [14]:
comparison_data = []
for model_name, results in all_results.items():
    comparison_data.append({
        'Model': model_name,
        'Train RMSE': results['train_rmse'],
        'Val RMSE': results['val_rmse'],
        'Test RMSE': results['test_rmse'],
        'Test MAE': results['test_mae'],
        'Test R²': results['test_r2'],
        'Epochs': results['epochs'],
        'Train Time (s)': results['train_time']
    })

comparison_df = pd.DataFrame(comparison_data).sort_values('Test RMSE')
print("\nDeep Learning Model Comparison (sorted by Test RMSE):")
print(comparison_df.to_string(index=False))


Deep Learning Model Comparison (sorted by Test RMSE):
        Model  Train RMSE  Val RMSE  Test RMSE  Test MAE  Test R²  Epochs  Train Time (s)
   Simple MLP    8.379099  8.779502   8.706170  6.048491 0.352527     100      379.267989
       ResNet    8.913293  8.938068   8.843155  6.198777 0.331992      65      657.129309
      Deep NN   10.451291 10.716557  10.326526  7.348140 0.089089      87      784.777441
Wide and Deep   10.434154 10.462043  10.390202  8.498646 0.077821      32      219.750270


In [15]:
fig = px.bar(
    comparison_df,
    x='Model',
    y=['Train RMSE', 'Val RMSE', 'Test RMSE'],
    barmode='group',
    title='Deep Learning Model Comparison - RMSE',
    labels={'value': 'RMSE (years)', 'variable': 'Dataset'}
)
fig.update_layout(template='plotly_white')
fig.write_html('reports/figures/29_dl_model_comparison.html')
fig.show()

In [16]:
fig = px.bar(
    comparison_df,
    x='Model',
    y='Test R²',
    title='Deep Learning Model Comparison - R² Score',
    color='Test R²',
    color_continuous_scale='Viridis'
)
fig.update_layout(template='plotly_white')
fig.write_html('reports/figures/30_dl_model_r2.html')
fig.show()

## 9. Best Model Predictions

In [17]:
best_model_name = comparison_df.iloc[0]['Model']
print(f"Best DL Model: {best_model_name}")

if best_model_name == 'Simple MLP':
    best_model = simple_mlp
elif best_model_name == 'Deep NN':
    best_model = deep_nn
elif best_model_name == 'Wide and Deep':
    best_model = wide_deep
else:
    best_model = resnet

Best DL Model: Simple MLP


In [18]:
y_test_pred = best_model.predict(X_test, verbose=0).flatten()

sample_size = 5000
sample_idx = np.random.choice(len(y_test), sample_size, replace=False)

fig = px.scatter(
    x=y_test[sample_idx],
    y=y_test_pred[sample_idx],
    title=f'{best_model_name} - Predicted vs Actual Year',
    labels={'x': 'Actual Year', 'y': 'Predicted Year'},
    opacity=0.5
)

min_val = min(y_test.min(), y_test_pred.min())
max_val = max(y_test.max(), y_test_pred.max())
fig.add_trace(go.Scatter(
    x=[min_val, max_val],
    y=[min_val, max_val],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(template='plotly_white')
fig.write_html('reports/figures/31_best_dl_predictions.html')
fig.show()

In [19]:
residuals = y_test - y_test_pred

fig = px.histogram(
    x=residuals,
    nbins=100,
    title=f'{best_model_name} - Residual Distribution',
    labels={'x': 'Residual (Actual - Predicted)', 'y': 'Frequency'}
)
fig.add_vline(x=0, line_dash='dash', line_color='red')
fig.update_layout(template='plotly_white')
fig.write_html('reports/figures/32_best_dl_residuals.html')
fig.show()

print(f"\nResidual Statistics:")
print(f"Mean: {residuals.mean():.4f}")
print(f"Std: {residuals.std():.4f}")


Residual Statistics:
Mean: 0.0693
Std: 8.7059


## 10. Save Results

In [20]:
simple_mlp.save('models/dl/simple_mlp_final.keras')
deep_nn.save('models/dl/deep_nn_final.keras')
wide_deep.save('models/dl/wide_deep_final.keras')
resnet.save('models/dl/resnet_final.keras')

print("All models saved to models/dl/")

All models saved to models/dl/


In [21]:
comparison_df.to_csv('reports/metrics/05_dl_model_comparison.csv', index=False)
print("Model comparison saved to reports/metrics/05_dl_model_comparison.csv")

Model comparison saved to reports/metrics/05_dl_model_comparison.csv


In [22]:
best_model_info = {
    'model_name': best_model_name,
    'test_rmse': float(comparison_df.iloc[0]['Test RMSE']),
    'test_mae': float(comparison_df.iloc[0]['Test MAE']),
    'test_r2': float(comparison_df.iloc[0]['Test R²'])
}

with open('reports/metrics/best_dl_model.json', 'w') as f:
    json.dump(best_model_info, f, indent=2)

print(f"\nBest DL Model: {best_model_name}")
print(f"Test RMSE: {best_model_info['test_rmse']:.4f} years")
print(f"Test MAE: {best_model_info['test_mae']:.4f} years")
print(f"Test R²: {best_model_info['test_r2']:.4f}")


Best DL Model: Simple MLP
Test RMSE: 8.7062 years
Test MAE: 6.0485 years
Test R²: 0.3525
