In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from icecream import ic

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchmetrics import Accuracy, Recall, Precision

import mlflow

In [None]:
!pipx run mlflow server --host 127.0.0.1 --port 8080 --backend-store-uri sqlite:///my.db

In [None]:
mlflow.set_tracking_uri('http://localhost:8080/')
#mlflow.create_experiment('Total_Cloudssifier')
mlflow.set_experiment("Total_Cloudssifier")
mlflow.set_experiment_tag("version", "1.0.0")
run = mlflow.start_run()

Exception: Run with UUID 1fd84faa4c314c11b38846a8bf606dc4 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True

In [63]:
mlflow.pytorch.autolog()

In [59]:
train_transforms = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        transforms.ToTensor(),
        transforms.Resize((128, 128)),
    ]
)

dataset_train = ImageFolder("../data/clouds_train", transform=train_transforms)
dataloader_train = DataLoader(dataset_train, shuffle=True, batch_size=10)

In [64]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
        )
        self.classifier = nn.Linear(64 * 32 * 32, num_classes)
        self.accuracy = Accuracy(task="multiclass", num_classes=7)
        self.recall = Recall(task="multiclass", num_classes=7)
        self.precision = Precision(task="multiclass", num_classes=7)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x


In [65]:
from torchmetrics import Recall, Precision, Accuracy

accuracy = Accuracy(task='multiclass', num_classes=7, average='macro')
precision_per_class = Precision(task="multiclass", num_classes=7, average=None)
recall_per_class = Recall(task="multiclass", num_classes=7, average=None)
recall_micro = Recall(task="multiclass", num_classes=7, average="micro")
recall_macro = Recall(task="multiclass", num_classes=7, average="macro")
recall_weighted = Recall(task="multiclass", num_classes=7, average="weighted")

net = Net(num_classes=7)
criterion = nn.CrossEntropyLoss()
lr = 0.001
optimizer = optim.Adam(net.parameters(), lr=lr)
mlflow.log_param('learning_rate', lr)

for epoch in range(3):
    running_loss = 0.0
    # Iterate over training batches
    for images, labels in dataloader_train:
        optimizer.zero_grad()
        outputs = net(images)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        accuracy(preds, labels)
        precision_per_class(preds, labels)
        recall_per_class(preds, labels)
        recall_micro(preds, labels)
        recall_macro(preds, labels)
        recall_weighted(preds, labels)

    accuracy.compute()
    precision_per_class.compute()
    recall_per_class.compute()
    recall_micro.compute()
    recall_macro.compute()
    recall_weighted.compute()

    epoch_loss = running_loss / len(dataloader_train)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")


Epoch 1, Loss: 2.5395
Epoch 2, Loss: 1.4980
Epoch 3, Loss: 1.3558


In [37]:
test_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((128, 128)),
    ]
)

dataset_test = ImageFolder("../data/clouds_test", transform=test_transforms)
dataloader_test = DataLoader(dataset_test, shuffle=True, batch_size=10)

In [None]:
metric_precision = Precision(task="multiclass", num_classes=7, average="macro")
metric_accuracy = Accuracy(task="multiclass", num_classes=7, average="macro")
metric_recall = Recall(task="multiclass", num_classes=7, average="macro")
metric_precision_per_class = Precision(task="multiclass", num_classes=7, average=None)

net.eval()
with torch.no_grad():
    for images, labels in dataloader_test:
        outputs = net(images.float())
        _, preds = torch.max(outputs, 1)
        metric_precision(preds, labels)
        metric_recall(preds, labels)
        metric_accuracy(preds, labels)
        metric_precision_per_class(preds, labels)

precision = metric_precision.compute()
recall = metric_recall.compute()
accuracy = metric_accuracy.compute()
precision_per_class = metric_precision_per_class.compute()

mlflow.log_metric('test_precision', precision)
mlflow.log_metric("test_recall", recall)
mlflow.log_metric("test_accuracy", accuracy)

ic| precision: tensor(0.6173)
ic| recall: tensor(0.5016)
ic| precision_per_class: tensor([0.4643, 0.9167, 1.0000, 0.3911, 0.4331, 0.4730, 0.6429])


tensor([0.4643, 0.9167, 1.0000, 0.3911, 0.4331, 0.4730, 0.6429])

In [42]:
dataset_test.class_to_idx

{'cirriform clouds': 0,
 'clear sky': 1,
 'cumulonimbus clouds': 2,
 'cumulus clouds': 3,
 'high cumuliform clouds': 4,
 'stratiform clouds': 5,
 'stratocumulus clouds': 6}

In [None]:
Next:

Add comments
Add MLFlow
Serving the model