<a href="https://colab.research.google.com/github/matteogianferrari/homelens-ca/blob/main/model/notebooks/homelens_ca_research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **HomeLens CA DL Model Research**

# **Setup**


---
**OS Environment Variables**

In [1]:
from google.colab import userdata
import os

# Sets environ variables for GitHub
os.environ['GITHUB_TOKEN'] = userdata.get('GITHUB_TOKEN')
os.environ['USER'] = userdata.get('USER')

# Sets environ variables for MLFlow
os.environ['MLFLOW_TRACKING_USERNAME'] = userdata.get('USER')
os.environ['MLFLOW_TRACKING_PASSWORD'] = userdata.get('MLFLOW_TRACKING_PASSWORD')

---
**Libraries Install**

In [2]:
!pip install dagshub mlflow pynvml --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.2/258.2 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.3/28.3 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m93.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

---
**Libraries imports**

In [3]:
import tensorflow as tf

import mlflow
from mlflow.tracking import MlflowClient

import dagshub

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

---
**Dataset import**

In [4]:
# Clones the repo and changes dir
!git clone https://${GITHUB_TOKEN}@github.com/${USER}/homelens-ca.git
%cd homelens-ca/

Cloning into 'homelens-ca'...
remote: Enumerating objects: 185, done.[K
remote: Counting objects: 100% (185/185), done.[K
remote: Compressing objects: 100% (105/105), done.[K
remote: Total 185 (delta 93), reused 146 (delta 61), pack-reused 0 (from 0)[K
Receiving objects: 100% (185/185), 7.34 MiB | 20.94 MiB/s, done.
Resolving deltas: 100% (93/93), done.
/content/homelens-ca


In [5]:
# Changes dir
%cd model/data/dataset

# Imports the dataset and the labels
X = pd.read_csv('X.csv').to_numpy()
y = pd.read_csv('y.csv').to_numpy()

# Applies transformation to labels
y = np.log1p(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

/content/homelens-ca/model/data/dataset


---
**DagsHub and MLFlow**

In [6]:
# Init DagsHub
dagshub.init(repo_owner='matteogianferrari', repo_name='homelens-ca', mlflow=True)
TRACKING_URI = "https://dagshub.com/matteogianferrari/homelens-ca.mlflow"

# Sets MLFlow tracking URI
mlflow.set_tracking_uri(TRACKING_URI)

# Sets MLFLow experiment name
experiment_name = "HomeLens CA-DL Research"



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=6488d362-8387-4fac-ac09-2b9bddbd7001&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=0957bf0d498e412da53de597286a3163ce2bb1f11972ae1ae6eaf9269a6cccf7




Output()

# **Research and Training DL Models**

---
**Model Hyperparameters**

Defines the Hyperparameters for the model.

In [7]:
epochs = 500
initial_learning_rate = 1e-3
minimum_learning_rate = 1e-8
patience_es=50
patience_lr=15
decaying_factor=0.5
batch_size=128
validation_slip=0.1
loss_func='mse'

---
**Training and Testing**

In [8]:
import pynvml
from tensorflow.keras.regularizers import l2

# Sets the MLFlow experiment
mlflow.set_experiment(experiment_name)

# Autolog
mlflow.keras.autolog()



def residual_block(x, dropout_rate=0.3, l2_reg=1e-4):
    """
    Implements a residual block with the following series:
      Dense(512) -> BN -> Dropout ->
      Dense(256) -> BN -> Dropout ->
      Dense(128) -> BN -> Dropout ->
      (Add shortcut) -> ReLU activation
    """
    # Save the input for the residual (skip) connection.
    shortcut = x

    # First dense layer: 512 units.
    x = tf.keras.layers.Dense(512, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)

    # Second dense layer: 256 units.
    x = tf.keras.layers.Dense(256, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)

    # Third dense layer: 128 units.
    x = tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)

    # Before adding, ensure the shortcut has the same dimension (128) as the output.
    if shortcut.shape[-1] != 128:
        shortcut = tf.keras.layers.Dense(128, kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg))(shortcut)

    # Add the shortcut (residual connection) and apply a ReLU activation.
    x = tf.keras.layers.Add()([x, shortcut])
    x = tf.keras.layers.Activation('relu')(x)
    return x

# Define the input layer for 15 features.
inputs = tf.keras.layers.Input(shape=(15,))

# Apply the residual block.
x = residual_block(inputs, dropout_rate=0.2)
x = residual_block(x, dropout_rate=0.2)
x = residual_block(x, dropout_rate=0.2)

# Optionally, add additional layers after the residual block.
x = tf.keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(x)

# Final output layer for regression (one continuous output).
outputs = tf.keras.layers.Dense(1)(x)

# Create and compile the model.
model = tf.keras.Model(inputs=inputs, outputs=outputs)



# Set up a decaying learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)

# You can now compile your model using this optimizer
model.compile(
    optimizer=optimizer,
    loss=loss_func,
    metrics=['r2_score', 'mae']
)

# Creates an Early-Stopping callback
es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_r2_score',
    mode='max',
    patience=patience_es,
    restore_best_weights=True
)

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=decaying_factor,
    patience=patience_lr,
    min_lr=minimum_learning_rate
)


# Train the model and log with MLFlow
with mlflow.start_run(log_system_metrics=True):
    mlflow.log_param("patience", patience_es)
    mlflow.log_param("loss_func", loss_func)

    history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=validation_slip,
        callbacks=[es_callback, lr_callback]
    )

    # Test the model and log with MLFlow
    test_loss, test_r2_score, test_mae = model.evaluate(X_test, y_test)
    mlflow.log_metric("test_loss", test_loss)
    mlflow.log_metric("test_r2_score", test_r2_score)
    mlflow.log_metric("test_mae", test_mae)

2025/02/02 14:23:19 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Epoch 1/500
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 167ms/step - loss: 27.9037 - mae: 3.7536 - r2_score: -95.6286 - val_loss: 17.1173 - val_mae: 4.0388 - val_r2_score: -60.6996 - learning_rate: 0.0010
Epoch 2/500
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.2458 - mae: 1.0244 - r2_score: -5.1055 - val_loss: 5.0547 - val_mae: 2.0276 - val_r2_score: -15.7153 - learning_rate: 0.0010
Epoch 3/500
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.7614 - mae: 0.8600 - r2_score: -3.2466 - val_loss: 2.4262 - val_mae: 1.2650 - val_r2_score: -5.9169 - learning_rate: 0.0010
Epoch 4/500
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.5445 - mae: 0.7750 - r2_score: -2.5015 - val_loss: 1.6634 - val_mae: 0.9362 - val_r2_score: -3.0759 - learning_rate: 0.0010
Epoch 5/500
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 1.3955 - mae:



🏃 View run classy-hen-948 at: https://dagshub.com/matteogianferrari/homelens-ca.mlflow/#/experiments/0/runs/94eaaf378d5e43a69d674f13cab63fe4
🧪 View experiment at: https://dagshub.com/matteogianferrari/homelens-ca.mlflow/#/experiments/0


2025/02/02 14:34:50 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/02/02 14:34:51 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
