# Deepchecks
This notebook explores deepchecks capabilities to test our dataset.

In [None]:
import os
import polars as pl

train_dfs = []
test_dfs = []

for base_dir, sub_dirs, files in os.walk("../data/raw/food101/data"):
    for filename in files:
        if "train" in filename and filename.endswith(".parquet"):
            # print(f"Reading: {os.path.join(base_dir, filename)}")
            train_dfs.append(pl.read_parquet(os.path.join(base_dir, filename)))
        elif "validation" in filename and filename.endswith(".parquet"):
            # print(f"Reading: {os.path.join(base_dir, filename)}")
            test_dfs.append(pl.read_parquet(os.path.join(base_dir, filename)))

train_data = pl.concat(train_dfs)
train_data = train_data.sample(fraction=1.0, shuffle=True, seed=42) # Shuffle the dataset

test_data = pl.concat(test_dfs)
test_data = test_data.sample(fraction=1.0, shuffle=True, seed=42) # Shuffle the dataset

display(train_data.head(3))

In [None]:
from deepchecks.vision.vision_data import VisionData
from src.labels import LABELS
from src.data.food_dataset import FoodDataset, dc_collate
from torch.utils.data import DataLoader

# Build label map once (Deepchecks expects idx->class_name)
label_map = {i: label for i, label in enumerate(LABELS)}
print("Num classes:", len(LABELS))

# Instantiate datasets from polars DataFrames
train_dataset = FoodDataset(train_data, class_names=LABELS, image_col="image", label_col="label")
test_dataset  = FoodDataset(test_data,  class_names=LABELS, image_col="image", label_col="label")

# DataLoaders that yield Deepchecks-compatible dicts
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=0, collate_fn=dc_collate)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False, num_workers=0, collate_fn=dc_collate)

# Wrap for Deepchecks
train_ds = VisionData(batch_loader=train_loader, task_type="classification", label_map=label_map)
test_ds  = VisionData(batch_loader=test_loader,  task_type="classification", label_map=label_map)

In [None]:
from deepchecks.vision.suites import train_test_validation

suite = train_test_validation()
result = suite.run(train_ds, test_ds)

In [None]:
result

In [None]:
from datetime import datetime, timezone

result.save_as_html(f'../reports/figures/deepchecks{datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S")}.html')