In [None]:
from dotenv import load_dotenv
# Load envionment variables
load_dotenv()

In [None]:
# Load packages
import os

import mlflow
import mlflow.pytorch
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

from mdsist.architectures import CNN
from mdsist.dataset import MdsistDataset
from mdsist.config import PROCESSED_DATA_DIR, RAW_DATA_DIR, MODELS_DIR
from mdsist.trainer import Trainer

In [3]:
# Set hyperparameters
EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE = 0.001

In [4]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load datasets
train_dataset = MdsistDataset(RAW_DATA_DIR / 'train-00000-of-00001.parquet', transform=transform)
val_dataset = MdsistDataset(RAW_DATA_DIR / 'test-00000-of-00001.parquet', transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
# Init model
model = CNN()

In [6]:
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))
mlflow.set_experiment('CNN_training_v1')

with mlflow.start_run():
    mlflow.set_tag('mlflow.runName', 'First MLFlow test')

    # Log hyperparameters
    mlflow.log_param('epochs', EPOCHS)
    mlflow.log_param('batch_size', BATCH_SIZE)
    mlflow.log_param('learning_rate', LEARNING_RATE)
    
    # Train
    trainer = Trainer(model, optimizer)
    trainer.train(train_loader, val_loader, 5)

    # Log the model itself to MLflow
    mlflow.pytorch.log_model(trainer.model, "model")

In [8]:
torch.save(trainer.model, MODELS_DIR / 'model.pt')