# Test model
This notebook is a simple proof that the model can be loaded from MLFlow and tested again the MDSist test dataset

## Intial set up

In [6]:
from dotenv import load_dotenv

# Load envionment variables
load_dotenv()

# Load packages
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

from mdsist.dataset import MdsistDataset
from mdsist.config import PROCESSED_DATA_DIR
from mdsist.trainer import Trainer

import mlflow
import mlflow.pytorch

import mdsist.util as util

In [7]:
# Settings
MODEL_URI = 'runs:/10cb51b288134c48835a8c0b9fe66eca/model_20240930190709'
BATCH_SIZE = 64

## Load test dataset

In [8]:
# Define transformations
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Load dataset
test_dataset = MdsistDataset(PROCESSED_DATA_DIR / 'test.parquet', transform=transform)

# Create DataLoader
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Download model from mlflow

In [9]:
model = mlflow.pytorch.load_model(MODEL_URI)
device = util.get_available_device()
model.to(device)

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1568, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

## Test that the model works
Finally, we can validate that the model can infer on the test dataset

In [10]:
# We can leverage Trainer 'validate' method

evaluator = Trainer(model, device=device)
test_stats = evaluator.validate(test_loader)

print(test_stats)

TrainStats(accuracy=0.9878095238095238, precision=0.9882539457942343, recall=0.9875381146754026, f1_score=0.9877301554485743, loss=0.039559037438536476)
