In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchinfo import summary
from torchmetrics import Accuracy
from torchvision.transforms import ToTensor
from torchvision import datasets
from azure.ai.ml.entities import Workspace, Environment
import mlflow
import numpy as np
import pandas as pd


  Referenced from: <367D4265-B20F-34BD-94EB-4F3EE47C385B> /Users/peterbrezovcsik/miniconda3/envs/face_recognition/lib/python3.12/site-packages/torchvision/image.so
  warn(


In [3]:
mlflow.set_tracking_uri('http://127.0.0.1:8080')
mlflow.set_experiment('array_demo')

<Experiment: artifact_location='mlflow-artifacts:/136916772708828886', creation_time=1739206421607, experiment_id='136916772708828886', last_update_time=1739206421607, lifecycle_stage='active', name='array_demo', tags={}>

In [56]:
metrics = np.array([0.13333, 0.43038])
step = 0
df = pd.DataFrame(columns=['1', '2'])
df.loc[step] = metrics
df.loc[step+1] = np.array([0.53333, 0.03038])
df

Unnamed: 0,1,2
0,0.13333,0.43038
1,0.53333,0.03038


In [None]:
class MetricsLogger():
    def __init__(self, columns):
        self.df_f1 = pd.DataFrame(columns=columns)
        self.df_rec = pd.DataFrame(columns=columns)
        self.df_prec = pd.DataFrame(columns=columns)
        self.df_acc = pd.DataFrame(columns=columns)
        
    
    def log_metrics(self, stage, metrics, step):
        
        mlflow.log_metric(f"Loss_{stage}", f"{metrics['loss']:2f}", step=step)
        mlflow.log_metric(f"Hamming_loss_{stage}", f"{metrics['hamming_loss']:2f}", step=step)
        mlflow.log_metric(f'Subset_Accuracy_{stage}', metrics['averaged_example_based_accuracy'], step=step)

        mlflow.log_metric(f'F1_Score_Micro_{stage}', metrics['f1_score']['micro_averaged'], step=step)
        mlflow.log_metric(f'F1_Score_Macro_{stage}', metrics['f1_core']['micro_averaged'], step=step)
        mlflow.log_metric(f'F1_Score_Sample_{stage}', metrics['f1_core']['sample_average'], step=step)
        mlflow.log_metric(f'F1_Score_Weighted_{stage}', metrics['f1_core']['weighted_averaged'], step=step)
        if stage == 'eval':
            self.df_f1.loc[step] = metrics['f1_score']['per_label']
            self.df_rec.loc[step] = metrics['recall']['per_label']
            self.df_prec.loc[step] = metrics['precision']['per_label']
            #self.df_acc.loc[step] = metrics['accuracy']['per_label']
    
    def save_artifact(self, path):
        mlflow.log_table(self.df_f1, 'f1_score_per_label.csv')
        
        


In [None]:
training_data = datasets.FashionMNIST(
    root="../data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.FashionMNIST(
    root="../data",
    train=False,
    download=True,
    transform=ToTensor(),
)


In [None]:
print(f"Image size: {training_data[0][0].shape}")
print(f"Size of training dataset: {len(training_data)}")
print(f"Size of test dataset: {len(test_data)}")


In [None]:
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [None]:
class ImageClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3),
            nn.ReLU(),
            nn.Flatten(),
            nn.LazyLinear(10),  # 10 classes in total.
        )

    def forward(self, x):
        return self.model(x)


In [None]:
# Get cpu or gpu for training.
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def train(dataloader, model, loss_fn, optimizer, epoch):
    """Train the model on a single pass of the dataloader.

    Args:
        dataloader: an instance of `torch.utils.data.DataLoader`, containing the training data.
        model: an instance of `torch.nn.Module`, the model to be trained.
        loss_fn: a callable, the loss function.
        metrics_fn: a callable, the metrics function.
        optimizer: an instance of `torch.optim.Optimizer`, the optimizer used for training.
        epoch: an integer, the current epoch number.
    """
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        #accuracy = metrics_fn(pred, y)

        # Backpropagation.
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch
            step = batch // 100 * (epoch + 1)
            mlflow.log_metric("loss", f"{loss:2f}", step=step)
            #mlflow.log_metric("accuracy", f"{accuracy:2f}", step=step)
            print(f"loss: {loss:2f} accuracy: NA [{current} / {len(dataloader)}]")


In [None]:
def evaluate(dataloader, model, loss_fn, epoch):
    """Evaluate the model on a single pass of the dataloader.

    Args:
        dataloader: an instance of `torch.utils.data.DataLoader`, containing the eval data.
        model: an instance of `torch.nn.Module`, the model to be trained.
        loss_fn: a callable, the loss function.
        metrics_fn: a callable, the metrics function.
        epoch: an integer, the current epoch number.
    """
    num_batches = len(dataloader)
    model.eval()
    eval_loss, eval_accuracy = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            eval_loss += loss_fn(pred, y).item()
            eval_accuracy += metrics_fn(pred, y)

    eval_loss /= num_batches
    #eval_accuracy /= num_batches
    mlflow.log_metric("eval_loss", f"{eval_loss:2f}", step=epoch)
    #mlflow.log_metric("eval_accuracy", f"{eval_accuracy:2f}", step=epoch)

    print(f"Eval metrics: \nAccuracy: NA, Avg loss: {eval_loss:2f} \n")


In [None]:
epochs = 3
loss_fn = nn.CrossEntropyLoss()
#metric_fn = Accuracy(task="multiclass", num_classes=10).to(device)
model = ImageClassifier().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)


In [None]:
with mlflow.start_run(run_name='f_mnist_test') as run:
    params = {
        "epochs": epochs,
        "learning_rate": 1e-3,
        "batch_size": 64,
        "loss_function": loss_fn.__class__.__name__,
        #"metric_function": metric_fn.__class__.__name__,
        "optimizer": "SGD",
    }
    # Log training parameters.
    mlflow.log_params(params)

    # Log model summary.
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(model)))
    mlflow.log_artifact("model_summary.txt")

    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer, epoch=t)
        evaluate(test_dataloader, model, loss_fn, epoch=t)

    # Save the trained model to MLflow.
    mlflow.pytorch.log_model(model, "model")


In [None]:
logged_model = f"runs:/{run.info.run_id}/model"
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [None]:
outputs = loaded_model.predict(training_data[2][0][None, :].numpy())
nn.Softmax(dim=1)(torch.tensor(outputs))