# MLFlow

In [None]:
!pip install dagshub mlflow



In [None]:
import mlflow
import dagshub

import mlflow.tensorflow
from mlflow.models.signature import infer_signature

# Set up MLflow tracking
dagshub.init(repo_owner='simoLoc', repo_name='ProgettoSE4AI', mlflow=True)

# Funzione che attiva il logging automatico di iperparametri, metriche ecc, a seconda della libreria usata.
# Eventualmente possiamo specificarne altre manualmente con mlflow.log_metric()
mlflow.autolog()

# Attiviamo l'autolog per TensorFlow
mlflow.tensorflow.autolog()

# Creiamo un esperimento in MLflow
mlflow.set_experiment("CNN Classification Report")



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=1949e468-f1df-48f7-93e2-c6e55fef5d01&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=e25a11ed810c9297ce14e9a9307ad4cdf417122f0c137cdfd07df0a3b23b552d




Output()

2025/06/01 16:13:19 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.
2025/06/01 16:13:21 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/06/01 16:13:21 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2025/06/01 16:13:21 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


<Experiment: artifact_location='mlflow-artifacts:/830f85127d2b48559d1927a1f9a1830a', creation_time=1748599390777, experiment_id='2', last_update_time=1748599390777, lifecycle_stage='active', name='CNN Classification Report', tags={}>

# Import Librerie

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# libreria di metriche di fairness
!pip install aif360
!pip install 'aif360[Reductions]'

Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Downloading aif360-0.6.1-py3-none-any.whl (259 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: aif360
Successfully installed aif360-0.6.1
Collecting fairlearn~=0.7 (from aif360[Reductions])
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


In [None]:
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import shutil
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt # plotting
import pandas as pd # data processing
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import seaborn as sns
from PIL import Image
import copy
from tqdm.keras import TqdmCallback

## Configurazione device

Un oggetto tensorflow.device rappresenta il dispositivo sul quale avverrà l'esecuzione.  Se il dispositivo disponibile è una GPU, allora la stringa in input sarà '/GPU:0', altrimenti nel nostro caso sarà la CPU passando in input '/CPU:0'.

In [None]:
device = tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0')
!nvidia-smi

Sun Jun  1 16:14:14 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   46C    P0             27W /   70W |     102MiB /  15360MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# Creazione dataset per la GAN

In [None]:
data_dir = '/content/drive/Shareddrives/ProgettoSE4AI/datasetUTK/'
img_dir = os.path.join(data_dir, 'images_UTKFace')
csv_dir = os.path.join(data_dir, 'list_attr_utk.csv')
output_dir = os.path.join(data_dir, 'images_filtered_UTKFace')

if os.path.exists(csv_dir):
  df_attr = pd.read_csv(csv_dir, delimiter=',')
  print(f'Righe totali prima del filtro: {df_attr.shape[0]}')
  print(df_attr.head(5))

  df_ethnicity_white = df_attr[df_attr['ethnicity'] == 0]
  df_ethnicity_black = df_attr[df_attr['ethnicity'] == 1]
  df_ethnicity_asian = df_attr[df_attr['ethnicity'] == 2]
  df_ethnicity_indian = df_attr[df_attr['ethnicity'] == 3]
  df_ethnicity_others = df_attr[df_attr['ethnicity'] == 4]

Righe totali prima del filtro: 23705
   age  gender  ethnicity                                img_name
0  100       0          0  100_0_0_20170112213500903.jpg.chip.jpg
1  100       0          0  100_0_0_20170112215240346.jpg.chip.jpg
2  100       1          0  100_1_0_20170110183726390.jpg.chip.jpg
3  100       1          0  100_1_0_20170112213001988.jpg.chip.jpg
4  100       1          0  100_1_0_20170112213303693.jpg.chip.jpg


In [None]:
print(f'Righe totali prima del filtro: {df_attr.shape[0]}')
print(df_ethnicity_white.shape[0])
print(df_ethnicity_black.shape[0])
print(df_ethnicity_asian.shape[0])
print(df_ethnicity_indian.shape[0])
print(df_ethnicity_others.shape[0])

num_samples = 5000

print("\n\nELEMENTI DA GENERARE")
print(num_samples - df_ethnicity_black.shape[0])
print(num_samples - df_ethnicity_asian.shape[0])
print(num_samples - df_ethnicity_indian.shape[0])
print(num_samples - df_ethnicity_others.shape[0])


Righe totali prima del filtro: 23705
10078
4526
3434
3975
1692


ELEMENTI DA GENERARE
474
1566
1025
3308


In [None]:
# Leggi il CSV e filtra
if os.path.exists(csv_dir):

  df_ethnicity_black_sampled = df_ethnicity_black.sample(frac=0.1, random_state=42)
  df_ethnicity_asian_sampled = df_ethnicity_asian.sample(frac=0.1, random_state=42)
  df_ethnicity_indian_sampled = df_ethnicity_indian.sample(frac=0.1, random_state=42)
  df_ethnicity_others_sampled = df_ethnicity_others.sample(frac=0.1, random_state=42)

  df_attr_filtered = pd.concat([
      df_ethnicity_black_sampled,
      df_ethnicity_asian_sampled,
      df_ethnicity_indian_sampled,
      df_ethnicity_others_sampled
  ],  ignore_index=True)

  df_attr_filtered = df_attr_filtered.sample(frac=1, random_state=42).reset_index(drop=True)
  df_attr_filtered.set_index('img_name', inplace=True)

  print(df_attr_filtered.shape[0])
  print(df_attr_filtered.head(5))

  df_attr_filtered.to_csv(os.path.join(data_dir, 'list_filtered_attr_utk.csv'))

  # Crea cartella per immagini filtrate
  os.makedirs(output_dir, exist_ok=True)

  # Copia le immagini corrispondenti
  copied = 0
  for img_name in df_attr_filtered.index:
      src_path = os.path.join(img_dir, img_name)
      dst_path = os.path.join(output_dir, img_name)
      if os.path.exists(src_path):
          shutil.copy(src_path, dst_path)
          copied += 1
      else:
          print(f"File non trovato: {img_name}")

  print(f"Immagini copiate: {copied}")
else:
  print("File CSV non trovato.")