# MLFlow

In [None]:
!pip install dagshub mlflow

Collecting dagshub
  Downloading dagshub-0.5.10-py3-none-any.whl.metadata (12 kB)
Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting appdirs>=1.4.4 (from dagshub)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting dacite~=1.6.0 (from dagshub)
  Downloading dacite-1.6.0-py3-none-any.whl.metadata (14 kB)
Collecting gql[requests] (from dagshub)
  Downloading gql-3.5.3-py2.py3-none-any.whl.metadata (9.4 kB)
Collecting dataclasses-json (from dagshub)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting treelib>=1.6.4 (from dagshub)
  Downloading treelib-1.7.1-py3-none-any.whl.metadata (1.4 kB)
Collecting pathvalidate>=3.0.0 (from dagshub)
  Downloading pathvalidate-3.2.3-py3-none-any.whl.metadata (12 kB)
Collecting boto3 (from dagshub)
  Downloading boto3-1.38.28-py3-none-any.whl.metadata (6.6 kB)
Collecting semver (from dagshub)
  Downloading semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)
Colle

In [None]:
import mlflow
import dagshub

import mlflow.tensorflow
from mlflow.models.signature import infer_signature

# Set up MLflow tracking
dagshub.init(repo_owner='simoLoc', repo_name='ProgettoSE4AI', mlflow=True)

# Funzione che attiva il logging automatico di iperparametri, metriche ecc, a seconda della libreria usata.
# Eventualmente possiamo specificarne altre manualmente con mlflow.log_metric()
mlflow.autolog()

# Attiviamo l'autolog per TensorFlow
mlflow.tensorflow.autolog()

# Creiamo un esperimento in MLflow
mlflow.set_experiment("CNN Classification Report")



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=4eca9333-aeac-4511-b623-b59b55b1cbd1&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=8363700452b1275ab086293ace50f00c413fef4a0df24841d508a84d3c4f0fe7




Output()

2025/06/03 12:45:17 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.
2025/06/03 12:45:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/06/03 12:45:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2025/06/03 12:45:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


<Experiment: artifact_location='mlflow-artifacts:/830f85127d2b48559d1927a1f9a1830a', creation_time=1748599390777, experiment_id='2', last_update_time=1748599390777, lifecycle_stage='active', name='CNN Classification Report', tags={}>

# Import Librerie


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import shutil
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt # plotting
import pandas as pd # data processing
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import seaborn as sns
from PIL import Image
import copy
from tqdm.keras import TqdmCallback

2025/06/03 12:45:49 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.


## Configurazione device

Un oggetto tensorflow.device rappresenta il dispositivo sul quale avverrà l'esecuzione.  Se il dispositivo disponibile è una GPU, allora la stringa in input sarà '/GPU:0', altrimenti nel nostro caso sarà la CPU passando in input '/CPU:0'.

In [None]:
device = tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0')
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


# Apertura dataset già splittato in train e test

In [None]:
data_dir = '/content/drive/Shareddrives/ProgettoSE4AI/datasetUTK/'

Creazione del `tf.data.Dataset`a partire dal dataframe in precedenza inizializzato, effettua il parsing e il preprocessing delle immagini. In particolare, le trasformazioni effettuate sono:
* `Rescaling`, il quale effettua uno scaling dei valori in un range da 0 a 1, poichè i modelli di deep learning funzionano meglio con valori normalizzati;
* `Normalize`, il quale di standardizzare i dati in un range da [-1, 1], sottraendo per la media e dividendo per la deviazione standard. Prende in input due tuple (che indicano media e deviazione standard) con tre elementi (Red, Green e Blue).



In [None]:
train_paths = np.load(data_dir + 'train_paths.npy')
test_paths = np.load(data_dir + 'test_paths.npy')
test_labels_np = np.load(data_dir + 'test_labels.npy')
train_labels_np = np.load(data_dir + 'train_labels.npy')


# layers di preprocessing
rescale = tf.keras.layers.Rescaling(1.0 / 255.0)

normalization = tf.keras.layers.Normalization(
    mean=[0.485, 0.456, 0.406],
    variance=[0.229, 0.224, 0.225]
)


# funzione di parsing e preprocessing
@tf.function
def parse_image(path):
    # leggi
    img = tf.io.read_file('/content/drive/Shareddrives/ProgettoSE4AI/' + path)
    # decodifica come immagine RGB
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    # le immagini originali sono 200x200
    img = tf.image.resize(img, [200, 200])
    # applica rescaling e normalization
    img = rescale(img)
    img = normalization(img)
    return img


# Prepara il dataset completo immagini + label
def create_dataset(image_paths, labels_array):
    AUTOTUNE = tf.data.AUTOTUNE

    images = tf.data.Dataset.from_tensor_slices(image_paths)
    labels = tf.data.Dataset.from_tensor_slices(labels_array)

    dataset = (
        tf.data.Dataset.zip((images, labels))
        .map(lambda path, label: (
            parse_image(path),
            {
                'age': label[0],
                'gender': label[1],
                'ethnicity': label[2]
            }
        ), num_parallel_calls=AUTOTUNE)
        .prefetch(AUTOTUNE)
    )
    return dataset

# Dataset unificati
train_dataset = create_dataset(train_paths, train_labels_np)
test_dataset = create_dataset(test_paths, test_labels_np)

# Verifica
for image, label in train_dataset.take(1):
    print("Image shape:", image.shape)
    print("Label:", label)

Image shape: (200, 200, 3)
Label: {'age': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'gender': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'ethnicity': <tf.Tensor: shape=(), dtype=int64, numpy=3>}


# Definizione CNN

In [None]:
def create_CNN_multi_output(input_shape,
                          num_ethnicity_classes=5,
                          num_age_classes=4,
                          dropout_rate=0.3,
                          activation='gelu',
                          learning_rate=1e-3):
    inputs = tf.keras.layers.Input(shape=input_shape)

    resnet_model = tf.keras.applications.EfficientNetB0(
      include_top=False,
      weights=None,
      input_tensor=inputs,
      input_shape=input_shape,
      pooling=None,
      classifier_activation='softmax'
    )

    x = resnet_model.output
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(32, activation=activation)(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)

    # Output heads
    age_output = tf.keras.layers.Dense(num_age_classes, activation='softmax', name='age')(x)
    gender_output = tf.keras.layers.Dense(1, activation='sigmoid', name='gender')(x)
    ethnicity_output = tf.keras.layers.Dense(num_ethnicity_classes, activation='softmax', name='ethnicity')(x)

    model = tf.keras.Model(inputs, [age_output, gender_output, ethnicity_output])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss={
            'age': 'sparse_categorical_crossentropy',
            'gender': 'binary_crossentropy',
            'ethnicity': 'sparse_categorical_crossentropy',
        },
        metrics={
            'age': 'accuracy',
            'gender': 'accuracy',
            'ethnicity': 'accuracy'
        }
    )
    return model

# Definizione del metodo `classify_face(image_path)`



In [None]:
best_model_path = '/content/drive/Shareddrives/ProgettoSE4AI/train_model/Best_Model_b32_eps15_lr_0.001_dr0.5.keras'
model = create_CNN_multi_output(input_shape=(200, 200, 3), dropout_rate=0.5, learning_rate=0.001)
model.load_weights(best_model_path)

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
dic = {
  "age": {
    "0": "Young",
    "1": "Adult",
    "2": "Senior",
    "3": "Elderly"
  },
  "ethnicity": {
    "0": "White",
    "1": "Black",
    "2": "Asian",
    "3": "Indian",
    "4": "Others"
  },
  "gender": {
    "0": "Male",
    "1": "Female"
  }
}


In [None]:
def preprocess_image(image_path):
  # leggi
  image = tf.io.read_file(image_path)
  # decodifica come immagine RGB
  image = tf.image.decode_image(image, channels=3, expand_animations=False)
  image = tf.image.resize(image, (200, 200))
  image = tf.cast(image, tf.float32) / 255.0
  image = tf.expand_dims(image, axis=0)  # batch dimension
  return image

def classify_face(image_path):
  """
  Classifica l'immagine del volto in età, genere, ed etnia.

  Args:
      image_path: path dell'immagine.

  Returns:
      dict: classificazioni in formato JSON.
  """
  processed_image = preprocess_image(image_path)
  preds = model.predict(processed_image)

  # Estrai le predizioni
  age_class_idx = str(np.argmax(preds[0]))
  gender_class_idx = str(round(preds[1][0][0]))  # output sigmoide
  ethnicity_class_idx = str(np.argmax(preds[2]))

  # Mapping con il dizionario
  result = {
      "age": dic["age"][age_class_idx],
      "gender": dic["gender"][gender_class_idx],
      "ethnicity": dic["ethnicity"][ethnicity_class_idx],
      "raw_output": {
          "age_logits": preds[0],
          "gender_prob": float(preds[1][0][0]),
          "ethnicity_logits": preds[2]
      }
  }

  return json.dumps(result, indent=2)


print(classify_face(test_paths[1]))