# 03-iterations-with-charts

In [1]:
!pip show mlops-ai

Name: mlops-ai
Version: 1.1.0
Summary: Mlops-ai library for managing machine learning projects, experiments, iterations and datasets.
Home-page: 
Author: Kacper Pękalski, Kajetan Szal, Jędrzej Rybczyński
Author-email: kac.pekalski1@gmail.com
License: Apache License 2.0
Location: c:\users\jedryb\anaconda3\lib\site-packages
Requires: requests
Required-by: 


## Setting active project & experiment

In [2]:
from mlops.tracking import get_project_by_name, set_active_project

project = get_project_by_name('Iris classification')
set_active_project(project['_id'])

'Active project set to: 6489ec527f8983d10183cc1b'

In [3]:
from mlops.tracking import get_experiment_by_name, set_active_experiment, start_iteration

experiment = get_experiment_by_name('torch NN models')
set_active_experiment(experiment_id=experiment['id'])

'Active experiment set to: 6489ec5f7f8983d10183cc1d'

## Creating some simple torch iterations with charts data

In [4]:
import pandas as pd 
from sklearn.model_selection import train_test_split

url = 'https://raw.githubusercontent.com/TripathiAshutosh/dataset/main/iris.csv'
df = pd.read_csv(filepath_or_buffer=url, sep=',')
y = df['class']
X = df.drop(columns=['class'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, stratify = y, random_state=42)

In [5]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

BATCH_SIZE=32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [6]:
from torch import nn

class BaselineNN(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )
    
    def forward(self, x):
        return self.layer_stack(x)

### 1st iteration

In [7]:
torch.manual_seed(42)

HIDDEN_UNITS=10
LEARNING_RATE=0.01

model = BaselineNN(input_shape=X_train.shape[1],
                   hidden_units=HIDDEN_UNITS,
                   output_shape=y.nunique())

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [8]:
from sklearn.metrics import accuracy_score

torch.manual_seed(42)


def train_model(model, epochs: int = 10):
    """
    Util function for training pyTorch model
    
    Args:
        model: torch model instance
        epochs (int): number of epochs
    """
    torch.manual_seed(42)
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    
    ## Training
    for epoch in range(epochs):
        
        train_loss, train_acc = 0, 0
        
        for batch, (X, y) in enumerate(train_loader):
            model.train() 
            y_pred = model(X)
            
            loss = loss_fn(y_pred, y)
            train_loss += loss
            train_acc += accuracy_score(torch.softmax(y_pred, dim=1).argmax(dim=1), y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        ## Validation
        val_loss, val_acc = 0, 0 
        model.eval()
        with torch.inference_mode():
            for X, y in test_loader:
                val_pred = model(X)
                val_loss += loss_fn(val_pred, y)
                val_acc += accuracy_score(torch.softmax(val_pred, dim=1).argmax(dim=1), y)

            val_loss /= len(test_loader)
            val_acc /= len(test_loader)

        print(f"Epoch: {epoch} | Train loss: {train_loss:.5f} | Train acc: {train_acc:.2f}% | Val loss: {val_loss:.5f}, Val acc: {val_acc:.2f}%")
        train_losses.append(round(train_loss.item(), 3))
        train_accs.append(round(train_acc.item(), 3))
        val_losses.append(round(val_loss.item(), 3))
        val_accs.append(round(val_acc.item(), 3))
        
    return train_losses, train_accs, val_losses, val_accs


def evaluate_model(model):
    """
    Util function for evaluating pyTorch model, i.e. returning predictions
    
    Args:
        model: torch model instance
    """
    model.eval()
    
    with torch.inference_mode():
        y_logits = model(X_test_tensor)
        y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)
        
    return y_pred.tolist()

In [9]:
EPOCHS=10
train_losses, train_accs, val_losses, val_accs = train_model(model, epochs=EPOCHS)
y_pred = evaluate_model(model)

Epoch: 0 | Train loss: 1.17898 | Train acc: 0.34% | Val loss: 0.99695, Val acc: 0.72%
Epoch: 1 | Train loss: 1.04611 | Train acc: 0.42% | Val loss: 1.00933, Val acc: 0.36%
Epoch: 2 | Train loss: 0.97937 | Train acc: 0.57% | Val loss: 0.94357, Val acc: 0.64%
Epoch: 3 | Train loss: 0.91316 | Train acc: 0.72% | Val loss: 0.86912, Val acc: 0.64%
Epoch: 4 | Train loss: 0.85918 | Train acc: 0.78% | Val loss: 0.80718, Val acc: 0.81%
Epoch: 5 | Train loss: 0.80410 | Train acc: 0.74% | Val loss: 0.75446, Val acc: 0.72%
Epoch: 6 | Train loss: 0.76311 | Train acc: 0.66% | Val loss: 0.70279, Val acc: 0.72%
Epoch: 7 | Train loss: 0.73427 | Train acc: 0.64% | Val loss: 0.65751, Val acc: 0.72%
Epoch: 8 | Train loss: 0.68466 | Train acc: 0.68% | Val loss: 0.62150, Val acc: 0.76%
Epoch: 9 | Train loss: 0.62407 | Train acc: 0.76% | Val loss: 0.59265, Val acc: 0.93%


In [10]:
parameters = {'batch_size': BATCH_SIZE, 'epochs': EPOCHS, 'learning_rate': LEARNING_RATE}
metrics = {
    'train_loss': round(train_losses[-1], 3),
    'train_acc': round(train_accs[-1], 3),
    'val_loss': round(val_losses[-1], 3),
    'val_acc': round(val_accs[-1], 3)
}

with start_iteration(iteration_name='NN v1') as iteration:
    iteration.log_parameters(parameters=parameters)
    iteration.log_metrics(metrics=metrics)
    
    iteration.log_chart(
        chart_name="Loss", chart_type="line",
        x_data=[[i for i in range(len(train_losses))]],
        y_data=[train_losses, val_losses],
        y_data_names=['training loss', 'validation loss'],
        x_label="epochs", y_label="Loss", 
        chart_title='Training vs validation loss',
        comparable=True)
    
    iteration.log_chart(
        chart_name="Accuracy", chart_type="line",
        x_data=[[i for i in range(len(train_losses))]],
        y_data=[train_accs, val_accs],
        y_data_names=['training acc', 'validation acc'],
        x_label="epochs", y_label="Loss", 
        chart_title='Training vs validation accuracy',
        comparable=True)
    
    iteration.log_image_chart(name="Image chart 1", 
                              image_path='./plot-1.png')
    iteration.log_image_chart(name="Image chart 2", 
                              image_path='./plot-2.png')

### 2nd iteration

In [11]:
torch.manual_seed(42)

HIDDEN_UNITS=100
LEARNING_RATE=0.001

model = BaselineNN(input_shape=X_train.shape[1],
                   hidden_units=HIDDEN_UNITS,
                   output_shape=y.nunique())

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [12]:
EPOCHS=10
train_losses, train_accs, val_losses, val_accs = train_model(model, epochs=EPOCHS)
y_pred = evaluate_model(model)

Epoch: 0 | Train loss: 1.13291 | Train acc: 0.35% | Val loss: 1.04898, Val acc: 0.64%
Epoch: 1 | Train loss: 1.01002 | Train acc: 0.66% | Val loss: 0.97090, Val acc: 0.72%
Epoch: 2 | Train loss: 0.95823 | Train acc: 0.70% | Val loss: 0.92601, Val acc: 0.72%
Epoch: 3 | Train loss: 0.96286 | Train acc: 0.64% | Val loss: 0.89495, Val acc: 0.72%
Epoch: 4 | Train loss: 0.93175 | Train acc: 0.64% | Val loss: 0.86333, Val acc: 0.72%
Epoch: 5 | Train loss: 0.88424 | Train acc: 0.66% | Val loss: 0.83263, Val acc: 0.72%
Epoch: 6 | Train loss: 0.85016 | Train acc: 0.66% | Val loss: 0.80585, Val acc: 0.72%
Epoch: 7 | Train loss: 0.82462 | Train acc: 0.64% | Val loss: 0.78180, Val acc: 0.80%
Epoch: 8 | Train loss: 0.79088 | Train acc: 0.91% | Val loss: 0.76002, Val acc: 0.88%
Epoch: 9 | Train loss: 0.75958 | Train acc: 0.91% | Val loss: 0.73725, Val acc: 0.86%


In [13]:
parameters = {'batch_size': BATCH_SIZE, 'epochs': EPOCHS, 'learning_rate': LEARNING_RATE}
metrics = {
    'train_loss': round(train_losses[-1], 3),
    'train_acc': round(train_accs[-1], 3),
    'val_loss': round(val_losses[-1], 3),
    'val_acc': round(val_accs[-1], 3)
}

with start_iteration(iteration_name='NN v2') as iteration:
    iteration.log_parameters(parameters=parameters)
    iteration.log_metrics(metrics=metrics)
    
    iteration.log_chart(
        chart_name="Loss", chart_type="line",
        x_data=[[i for i in range(len(train_losses))]],
        y_data=[train_losses, val_losses],
        y_data_names=['training loss', 'validation loss'],
        x_label="epochs", y_label="Loss", 
        chart_title='Training vs validation loss',
        comparable=True)
    
    iteration.log_chart(
        chart_name="Accuracy", chart_type="line",
        x_data=[[i for i in range(len(train_losses))]],
        y_data=[train_accs, val_accs],
        y_data_names=['training acc', 'validation acc'],
        x_label="epochs", y_label="Loss", 
        chart_title='Training vs validation accuracy',
        comparable=True)
    
    iteration.log_image_chart(name="Image chart 3", 
                              image_path='./plot-3.png')
    iteration.log_image_chart(name="Image chart 4", 
                              image_path='./plot-4.png')