# Train
Create a CNN and train it using the data preprocessed on previous notebooks

In [2]:
from dotenv import load_dotenv

# Load envionment variables
load_dotenv()

# Load packages
import os

import codecarbon as cc
import mlflow
import mlflow.pytorch
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.optim as optim
from ptflops import get_model_complexity_info

from mdsist.architectures import CNN
from mdsist.dataset import MdsistDataset
from mdsist.config import PROCESSED_DATA_DIR, RAW_DATA_DIR, MODELS_DIR
from mdsist.trainer import Trainer

import mdsist.util as util

[32m2024-10-11 13:35:19.290[0m | [1mINFO    [0m | [36mmdsist.config[0m:[36m<module>[0m:[36m15[0m - [1mPROJ_ROOT path is: C:\Users\Adria.Portatil-Adria\Documents\uni\MDS\MLOPS\MLOps-mdsist[0m


In [3]:
# Set hyperparameters
EXPERIMENT_ID = 'CNN_v2_testing_deleteME2'
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 0.0001
SEED = 42

In [4]:
# Seed for reproducibility
util.seed_all(SEED)

In [5]:
# Define transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load datasets
train_dataset = MdsistDataset(PROCESSED_DATA_DIR / 'train.parquet', transform=transform)
val_dataset = MdsistDataset(PROCESSED_DATA_DIR / 'validation.parquet', transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

Init model

In [6]:
model = CNN()
device = util.get_available_device()
model.to(device)

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1568, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

Log model complexity (params and flops)

In [7]:
util.log_model_complexity(model)

[32m2024-10-11 13:35:20.339[0m | [1mINFO    [0m | [36mmdsist.util[0m:[36mlog_model_complexity[0m:[36m74[0m - [1mFLOPS: 1.29 MMac[0m
[32m2024-10-11 13:35:20.339[0m | [1mINFO    [0m | [36mmdsist.util[0m:[36mlog_model_complexity[0m:[36m75[0m - [1mParameters: 206.92 k[0m


Optimizer

In [8]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))
mlflow.set_experiment(EXPERIMENT_ID)

with mlflow.start_run():
    mlflow.set_tag('mlflow.runName', 'Train')

    # Log hyperparameters
    mlflow.log_param('epochs', EPOCHS)
    mlflow.log_param('batch_size', BATCH_SIZE)
    mlflow.log_param('learning_rate', LEARNING_RATE)
    mlflow.log_param('seed', SEED)

    # Start emissions tracking
    #emissions_tracker = cc.EmissionsTracker(project_name='MDSIST', experiment_id=EXPERIMENT_ID  )
    #emissions_tracker.start()
    
    # Train
    trainer = Trainer(model, optimizer, device)
    trainer.train(train_loader, val_loader, 5)

    # Stop emissions tracking and log them
    #emissions = emissions_tracker.stop()
    #mlflow.log_metric('emissions_kg_co2', emissions)

    # Log the model itself to MLflow
    mlflow.pytorch.log_model(trainer.model, 'model')

2024/10/11 13:35:20 INFO mlflow.tracking.fluent: Experiment with name 'CNN_v2_testing_deleteME2' does not exist. Creating a new experiment.


[32m2024-10-11 13:35:22.857[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m72[0m - [1mStart training for 5 epochs...[0m


 20%|██        | 1/5 [00:27<01:49, 27.29s/it]

[32m2024-10-11 13:35:50.126[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m89[0m - [1mEpoch [1/5][0m
[32m2024-10-11 13:35:50.126[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m90[0m - [1m[Train] Loss: 0.2288 | Accuracy: 0.9330 | Precision: 0.9333 | Recall: 0.9325 | F1 Score: 0.9327[0m
[32m2024-10-11 13:35:50.142[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m95[0m - [1m[Val  ] Loss: 0.0821 | Accuracy: 0.9753 | Precision: 0.9753 | Recall: 0.9754 | F1 Score: 0.9752[0m


 40%|████      | 2/5 [00:55<01:23, 27.77s/it]

[32m2024-10-11 13:36:18.227[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m89[0m - [1mEpoch [2/5][0m
[32m2024-10-11 13:36:18.227[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m90[0m - [1m[Train] Loss: 0.0613 | Accuracy: 0.9815 | Precision: 0.9814 | Recall: 0.9813 | F1 Score: 0.9814[0m
[32m2024-10-11 13:36:18.244[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m95[0m - [1m[Val  ] Loss: 0.0523 | Accuracy: 0.9843 | Precision: 0.9841 | Recall: 0.9842 | F1 Score: 0.9841[0m


 60%|██████    | 3/5 [01:24<00:56, 28.18s/it]

[32m2024-10-11 13:36:46.899[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m89[0m - [1mEpoch [3/5][0m
[32m2024-10-11 13:36:46.914[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m90[0m - [1m[Train] Loss: 0.0423 | Accuracy: 0.9865 | Precision: 0.9865 | Recall: 0.9864 | F1 Score: 0.9864[0m
[32m2024-10-11 13:36:46.914[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m95[0m - [1m[Val  ] Loss: 0.0542 | Accuracy: 0.9840 | Precision: 0.9840 | Recall: 0.9838 | F1 Score: 0.9839[0m


 80%|████████  | 4/5 [01:58<00:30, 30.78s/it]

[32m2024-10-11 13:37:21.666[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m89[0m - [1mEpoch [4/5][0m
[32m2024-10-11 13:37:21.667[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m90[0m - [1m[Train] Loss: 0.0322 | Accuracy: 0.9899 | Precision: 0.9898 | Recall: 0.9898 | F1 Score: 0.9898[0m
[32m2024-10-11 13:37:21.674[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m95[0m - [1m[Val  ] Loss: 0.0498 | Accuracy: 0.9867 | Precision: 0.9865 | Recall: 0.9866 | F1 Score: 0.9865[0m


100%|██████████| 5/5 [02:30<00:00, 30.13s/it]


[32m2024-10-11 13:37:53.499[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m89[0m - [1mEpoch [5/5][0m
[32m2024-10-11 13:37:53.499[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m90[0m - [1m[Train] Loss: 0.0248 | Accuracy: 0.9920 | Precision: 0.9919 | Recall: 0.9919 | F1 Score: 0.9919[0m
[32m2024-10-11 13:37:53.499[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m95[0m - [1m[Val  ] Loss: 0.0482 | Accuracy: 0.9855 | Precision: 0.9856 | Recall: 0.9852 | F1 Score: 0.9854[0m
[32m2024-10-11 13:37:53.505[0m | [1mINFO    [0m | [36mmdsist.trainer[0m:[36mtrain[0m:[36m101[0m - [1mTraining completed.[0m


2024/10/11 13:38:08 INFO mlflow.tracking._tracking_service.client: View run Train at: https://dagshub.com/Zhengyong8119/MLOps-mdsist.mlflow/#/experiments/14/runs/ef51e37f6fd14b87ab58964e1153bd5d.
2024/10/11 13:38:08 INFO mlflow.tracking._tracking_service.client: View experiment at: https://dagshub.com/Zhengyong8119/MLOps-mdsist.mlflow/#/experiments/14.


save the model

In [10]:
torch.save(trainer.model, './model.pt')