In [None]:
from dotenv import load_dotenv

# Load envionment variables
load_dotenv()

In [None]:
# Load packages
import os

import codecarbon as cc
import mlflow
import mlflow.pytorch
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.optim as optim
from ptflops import get_model_complexity_info

from mdsist.architectures import CNN
from mdsist.dataset import MdsistDataset
from mdsist.config import PROCESSED_DATA_DIR, RAW_DATA_DIR, MODELS_DIR
from mdsist.trainer import Trainer

import mdsist.util as util

In [3]:
# Set hyperparameters
EXPERIMENT_ID = 'CNN_v2'
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 0.0001
SEED = 42

In [4]:
# Seed for reproducibility
util.seed_all(SEED)

In [5]:
# Define transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load datasets
train_dataset = MdsistDataset(PROCESSED_DATA_DIR / 'train.parquet', transform=transform)
val_dataset = MdsistDataset(PROCESSED_DATA_DIR / 'validation.parquet', transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Init model
model = CNN()
device = util.get_available_device()
model.to(device)

In [None]:
# Log model complexity (params and flops)
util.log_model_complexity(model)

In [8]:
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))
mlflow.set_experiment(EXPERIMENT_ID)

with mlflow.start_run():
    mlflow.set_tag('mlflow.runName', 'Train')

    # Log hyperparameters
    mlflow.log_param('epochs', EPOCHS)
    mlflow.log_param('batch_size', BATCH_SIZE)
    mlflow.log_param('learning_rate', LEARNING_RATE)
    mlflow.log_param('seed', SEED)

    # Start emissions tracking
    emissions_tracker = cc.EmissionsTracker(project_name='MDSIST', experiment_id=EXPERIMENT_ID  )
    emissions_tracker.start()
    
    # Train
    trainer = Trainer(model, optimizer, device)
    trainer.train(train_loader, val_loader, 5)

    # Stop emissions tracking and log them
    emissions = emissions_tracker.stop()
    mlflow.log_metric('emissions_kg_co2', emissions)

    # Log the model itself to MLflow
    mlflow.pytorch.log_model(trainer.model, 'model')

In [11]:
torch.save(trainer.model, MODELS_DIR / 'model.pt')