In [1]:
import os
import torch
import pandas as pd
import numpy as np
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [3]:
from testing.system import MNISTDataModule, DigitClassifierSystem
from testing.integration import MNISTIntegrationDataset

# Why
The goal of the notebook was to understand errors of the model on the integration test and get experience with pytorch class system

# What
* Load data using MNISTDataModule
* Make predictions using DigitClassifierSystem
* Convert everything to the dataframe with predictions and probability per class

In [4]:
# set working dir to the testing project so we can use  relative paths for data loading
root = "/workspaces/data-centric-deep-learning/course/week2/testing_project/"
if os.getcwd() != "root":
    os.chdir(root)

### Config

In [5]:
checkpoint_path = "./checkpoints/mlp.ckpt"
test_dir = Path("./images/integration")
test_dir_images = test_dir / "digits-processed"

### Load system

In [6]:
system = DigitClassifierSystem.load_from_checkpoint(checkpoint_path)

### Load data for the integration test

In [7]:
labels_df = pd.read_csv(test_dir / "labels.csv")
labels_df

Unnamed: 0,path,label
0,ts_00.png,0
1,ts_01.png,1
2,ts_02.png,2
3,ts_03.png,3
4,ts_04.png,4
5,ts_05.png,5
6,ts_06.png,6
7,ts_07.png,7
8,ts_08.png,8
9,ts_09.png,9


In [8]:
image_paths = [test_dir_images/name for name in labels_df.path]

missing_files = [f for f in image_paths if not f.exists()]
assert not missing_files

In [9]:
image_paths

[PosixPath('images/integration/digits-processed/ts_00.png'),
 PosixPath('images/integration/digits-processed/ts_01.png'),
 PosixPath('images/integration/digits-processed/ts_02.png'),
 PosixPath('images/integration/digits-processed/ts_03.png'),
 PosixPath('images/integration/digits-processed/ts_04.png'),
 PosixPath('images/integration/digits-processed/ts_05.png'),
 PosixPath('images/integration/digits-processed/ts_06.png'),
 PosixPath('images/integration/digits-processed/ts_07.png'),
 PosixPath('images/integration/digits-processed/ts_08.png'),
 PosixPath('images/integration/digits-processed/ts_09.png')]

In [10]:
labels = labels_df.label.values
labels

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
# configure pytorch classes for working with data (Dataset -> DataLoader)
dataset = MNISTIntegrationDataset(image_paths, labels, transform = transforms.ToTensor())
loader = DataLoader(dataset, batch_size=5)

### Iterate over batches and save all outputs

In [12]:
all_probs = []
all_preds = []

for batch in loader:
    X, y = batch
    with torch.no_grad():
        batch_logits = system.predict_step(X)
        batch_preds = torch.argmax(batch_logits, dim=1)
        # why don't we do this in the forward step?
        batch_probs = torch.softmax(batch_logits, -1)
    all_probs.append(batch_probs.numpy())
    all_preds.append(batch_preds.numpy())

all_probs = np.concatenate(all_probs, axis=0)
all_preds = np.concatenate(all_preds, axis=0)

In [13]:
all_preds

array([0, 7, 2, 5, 4, 5, 5, 2, 8, 3])

In [14]:
diagnostic_df = labels_df.copy()
diagnostic_df['predicted'] = all_preds
diagnostic_df = pd.concat([diagnostic_df, pd.DataFrame(all_probs).add_prefix("prob_").round(3)], axis=1)
diagnostic_df['is_correct'] = diagnostic_df['label'] == diagnostic_df['predicted']

In [15]:
diagnostic_df

Unnamed: 0,path,label,predicted,prob_0,prob_1,prob_2,prob_3,prob_4,prob_5,prob_6,prob_7,prob_8,prob_9,is_correct
0,ts_00.png,0,0,0.997,0.0,0.0,0.0,0.0,0.002,0.0,0.0,0.001,0.0,True
1,ts_01.png,1,7,0.0,0.018,0.0,0.099,0.322,0.022,0.0,0.495,0.012,0.031,False
2,ts_02.png,2,2,0.009,0.012,0.939,0.039,0.0,0.001,0.0,0.0,0.0,0.0,True
3,ts_03.png,3,5,0.0,0.0,0.0,0.338,0.0,0.661,0.0,0.0,0.001,0.0,False
4,ts_04.png,4,4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,True
5,ts_05.png,5,5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,True
6,ts_06.png,6,5,0.0,0.0,0.0,0.0,0.0,0.633,0.353,0.0,0.014,0.0,False
7,ts_07.png,7,2,0.0,0.0,0.732,0.004,0.257,0.0,0.0,0.007,0.0,0.0,False
8,ts_08.png,8,8,0.001,0.0,0.002,0.047,0.0,0.002,0.0,0.0,0.949,0.0,True
9,ts_09.png,9,3,0.0,0.0,0.0,0.588,0.0,0.408,0.0,0.0,0.004,0.0,False


In [16]:
diagnostic_df.to_csv("logs/integration_results.csv", index=False)