<a href="https://colab.research.google.com/github/matteogianferrari/homelens-ca/blob/main/model/notebooks/homelens_ca_research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **HomeLens CA DL Model Research**

# **Setup**


---
**OS Environment Variables**

In [1]:
from google.colab import userdata
import os

# Sets environ variables for GitHub
os.environ['GITHUB_TOKEN'] = userdata.get('GITHUB_TOKEN')
os.environ['USER'] = userdata.get('USER')

# Sets environ variables for MLFlow
os.environ['MLFLOW_TRACKING_USERNAME'] = userdata.get('USER')
os.environ['MLFLOW_TRACKING_PASSWORD'] = userdata.get('MLFLOW_TRACKING_PASSWORD')

---
**Libraries Install**

In [2]:
!pip install dagshub mlflow pynvml --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.2/258.2 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.3/28.3 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m118.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.6/233.6 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

---
**Libraries imports**

In [3]:
import tensorflow as tf

import mlflow
from mlflow.tracking import MlflowClient

import dagshub

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

---
**Dataset import**

In [4]:
# Clones the repo and changes dir
!git clone https://${GITHUB_TOKEN}@github.com/${USER}/homelens-ca.git
%cd homelens-ca/

Cloning into 'homelens-ca'...
remote: Enumerating objects: 60, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (41/41), done.[K
remote: Total 60 (delta 13), reused 52 (delta 8), pack-reused 0 (from 0)[K
Receiving objects: 100% (60/60), 1.93 MiB | 11.65 MiB/s, done.
Resolving deltas: 100% (13/13), done.
/content/homelens-ca


In [5]:
# Changes dir
%cd model/data/dataset

# Imports the dataset and the labels
X = pd.read_csv('X.csv').to_numpy()
y = pd.read_csv('y.csv').to_numpy()

# Applies transformation to labels
y = np.log1p(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

/content/homelens-ca/model/data/dataset


---
**DagsHub and MLFlow**

In [8]:
# Init DagsHub
dagshub.init(repo_owner='matteogianferrari', repo_name='homelens-ca', mlflow=True)
TRACKING_URI = "https://dagshub.com/matteogianferrari/homelens-ca.mlflow"

# Sets MLFlow tracking URI
mlflow.set_tracking_uri(TRACKING_URI)

# Sets MLFLow experiment name
experiment_name = "HomeLens CA-Research"

# **Research and Training DL Models**

---
**Model Hyperparameters**

Defines the Hyperparameters for the model.

In [9]:
epochs = 300
initial_learning_rate = 1e-3
minimum_learning_rate = 1e-8
patience_es=30
patience_lr=15
decaying_factor=0.5
batch_size=128
validation_slip=0.1
loss_func='mse'

---
**Training and Testing**

In [10]:
import pynvml

# Sets the MLFlow experiment
mlflow.set_experiment(experiment_name)

# Autolog
mlflow.keras.autolog()



# Defines the model
def build_dynamic_model(input_shape, n_repeats):
    """
    Builds a neural network model using TensorFlow's Functional API with repeated blocks.

    Args:
        input_shape (tuple): Shape of the input data.
        n_repeats (int): Number of times to repeat the block of layers.

    Returns:
        tf.keras.Model: A compiled Keras model.
    """
    # Input layer
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = inputs

    # Define the repeated block
    for _ in range(n_repeats):
        x = tf.keras.layers.Dense(512, activation='relu', kernel_initializer='he_normal')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.2)(x)

        x = tf.keras.layers.Dense(256, activation='relu', kernel_initializer='he_normal')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.2)(x)

        x = tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.2)(x)

    x = tf.keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal')(x)
    x = tf.keras.layers.BatchNormalization()(x)

    # Output layer
    outputs = tf.keras.layers.Dense(1)(x)

    # Create the model
    model = tf.keras.Model(inputs, outputs)
    return model

# Example usage
n_repeats = 3
input_shape = (15,)
model = build_dynamic_model(input_shape, n_repeats)



# Set up a decaying learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)

# You can now compile your model using this optimizer
model.compile(
    optimizer=optimizer,
    loss=loss_func,
    metrics=['r2_score']
)

# Creates an Early-Stopping callback
es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_r2_score',
    mode='max',
    patience=patience_es,
    restore_best_weights=True
)

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=decaying_factor,
    patience=patience_lr,
    min_lr=minimum_learning_rate
)


# Train the model and log with MLFlow
with mlflow.start_run(log_system_metrics=True):
    mlflow.log_param("patience", patience_es)
    mlflow.log_param("loss_func", loss_func)

    history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=validation_slip,
        callbacks=[es_callback, lr_callback]
    )

    # Test the model and log with MLFlow
    test_loss, test_r2_score = model.evaluate(X_test, y_test)
    mlflow.log_metric("test_loss", test_loss)
    mlflow.log_metric("test_r2_score", test_r2_score)

2025/01/23 19:42:26 INFO mlflow.tracking.fluent: Experiment with name 'HomeLens CA-Research' does not exist. Creating a new experiment.
2025/01/23 19:42:29 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Epoch 1/300
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 69ms/step - loss: 136.2269 - r2_score: -492.4857 - val_loss: 85.9498 - val_r2_score: -319.5759 - learning_rate: 0.0010
Epoch 2/300
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 72.1009 - r2_score: -261.1549 - val_loss: 19.1972 - val_r2_score: -70.6019 - learning_rate: 0.0010
Epoch 3/300
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 12.3668 - r2_score: -43.1783 - val_loss: 0.8932 - val_r2_score: -2.3316 - learning_rate: 0.0010
Epoch 4/300
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5608 - r2_score: -1.0515 - val_loss: 0.1145 - val_r2_score: 0.5731 - learning_rate: 0.0010
Epoch 5/300
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1832 - r2_score: 0.3425 - val_loss: 0.1323 - val_r2_score: 0.5067 - learning_rate: 0.0010
Epoch 6/300
[1m116/116[0m [32m━━━━━━━━━



🏃 View run entertaining-yak-327 at: https://dagshub.com/matteogianferrari/homelens-ca.mlflow/#/experiments/0/runs/a0737aa1a35541d4b9c1717001f90c71
🧪 View experiment at: https://dagshub.com/matteogianferrari/homelens-ca.mlflow/#/experiments/0


2025/01/23 19:48:18 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/01/23 19:48:19 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
