mryab · Q-c7 · Jan 22, 2025 · Jan 21, 2025
diff --git a/week02_management_and_testing/README.md b/week02_management_and_testing/README.md
@@ -0,0 +1,18 @@
+# Week 2: Experiment tracking and testing
+
+[//]: # (* Lecture:   [slides]&#40;./lecture.pdf&#41;)
+* Lecture: TBD
+* Seminar: see the [example_project](./example_project) directory
+* Homework: see [homework/README.md](homework/README.md)
+
+## Further reading
+* Tools for experiment tracking: [Aim](https://github.com/aimhubio/aim), [Comet](https://www.comet.ml/site/), [Neptune](https://neptune.ai/), [Sacred](https://github.com/IDSIA/sacred), [Weights and Biases](https://wandb.ai/), [ClearML](https://clear.ml/)
+* [DVC](https://dvc.org/) and [Pachyderm](https://www.pachyderm.com/) for artifact versioning
+* [Hydra documentation](https://hydra.cc/docs/intro/)
+* [Unittest](https://docs.python.org/3/library/unittest.html) built-in module
+* [Doctest](https://docs.python.org/3/library/doctest.html) built-in module (useful for testing docstrings!)
+* [Pytest](https://github.com/pytest-dev/pytest/) repository
+* Pytest plugins: [pytest-xdist](https://pypi.org/project/pytest-xdist/) for parallel execution, [pytest-cov](https://pytest-cov.readthedocs.io/en/latest/readme.html) for coverage reports.
+* [Hypothesis quick start guide](https://hypothesis.readthedocs.io/en/latest/quickstart.html) and [integration with pytest](https://hypothesis.readthedocs.io/en/latest/details.html#the-hypothesis-pytest-plugin)
+* [Full Stack Deep Learning "Troubleshooting & Testing" lecture](https://fullstackdeeplearning.com/course/2022/lecture-3-troubleshooting-and-testing/#4-resources)
+* [Made With ML MLOps Course, "Testing Machine Learning Systems: Code, Data and Models"](https://madewithml.com/courses/mlops/testing/)
diff --git a/week02_management_and_testing/example_project/compute_metrics.py b/week02_management_and_testing/example_project/compute_metrics.py
@@ -0,0 +1,54 @@
+import json
+from argparse import ArgumentParser
+
+import torch
+import torchvision.transforms as transforms
+from torchvision.datasets import CIFAR10
+from torchvision.models import resnet18
+
+from hparams import config
+
+
+def main(args):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
+    ])
+
+    test_dataset = CIFAR10(root='CIFAR10/test',
+                           train=False,
+                           transform=transform,
+                           download=False,
+                           )
+
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+                                              batch_size=config["batch_size"])
+
+    device = torch.device("cuda")
+
+    model = resnet18(pretrained=False, num_classes=10)
+    model.load_state_dict(torch.load("model.pt"))
+    model.to(device)
+
+    correct = 0.0
+
+    for test_images, test_labels in test_loader:
+        test_images = test_images.to(device)
+        test_labels = test_labels.to(device)
+
+        with torch.inference_mode():
+            outputs = model(test_images)
+            preds = torch.argmax(outputs, 1)
+            correct += (preds == test_labels).sum()
+
+    accuracy = correct / len(test_dataset)
+
+    with open("final_metrics.json", "w+") as f:
+        json.dump({"accuracy": accuracy.item()}, f)
+        print("\n", file=f)
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    args = parser.parse_args()
+    main(args)
diff --git a/week02_management_and_testing/example_project/dvc.yaml b/week02_management_and_testing/example_project/dvc.yaml
@@ -0,0 +1,24 @@
+stages:
+  prepare_data:
+    cmd: python prepare_data.py
+    deps:
+    - prepare_data.py
+    outs:
+    - CIFAR10
+  train:
+    cmd: python train.py
+    deps:
+    - CIFAR10
+    - hparams.py
+    - train.py
+    outs:
+    - model.pt
+  compute_metrics:
+    cmd: python compute_metrics.py
+    deps:
+    - CIFAR10
+    - compute_metrics.py
+    - model.pt
+    metrics:
+    - final_metrics.json:
+        cache: false
diff --git a/week02_management_and_testing/example_project/hparams.py b/week02_management_and_testing/example_project/hparams.py
@@ -0,0 +1,7 @@
+config = dict(
+    batch_size=64,
+    learning_rate=1e-5,
+    weight_decay=0.01,
+    epochs=2,
+    zero_init_residual=False,
+)
diff --git a/week02_management_and_testing/example_project/prepare_data.py b/week02_management_and_testing/example_project/prepare_data.py
@@ -0,0 +1,5 @@
+from torchvision.datasets import CIFAR10
+
+if __name__ == "__main__":
+    train_dataset = CIFAR10("CIFAR10/train", download=True)
+    test_dataset = CIFAR10("CIFAR10/test", download=True)
diff --git a/week02_management_and_testing/example_project/pyproject.toml b/week02_management_and_testing/example_project/pyproject.toml
@@ -0,0 +1,24 @@
+[project]
+name = "homework"
+version = "0.1.0"
+description = "Sample Text"
+authors = [ "YZ <spam@shit.com>" ]
+requires-python = ">=3.10"
+readme = "README.md"
+
+dependencies = [
+    "torch==2.1.2",
+    "torchvision==0.16.2",
+    "wandb>=0.13.10",
+    "tqdm==4.66.1",
+    "numpy==1.26.4",
+    "dvc==2.44.0",
+    "hydra-core==1.3.1",
+    "omegaconf==2.3.0",
+]
+
+[tool.uv]
+dev-dependencies = [
+    "pytest==7.4.4",
+    "pytest-cov==4.1.0",
+]
diff --git a/week02_management_and_testing/example_project/test_basic.py b/week02_management_and_testing/example_project/test_basic.py
@@ -0,0 +1,38 @@
+import torch
+import pytest
+
+from train import compute_accuracy
+
+def test_arange_elems():
+    arr = torch.arange(0, 10, dtype=torch.float)
+    assert torch.allclose(arr[-1], torch.tensor([9]).float())  #
+
+def test_div_zero():
+    a = torch.zeros(1,dtype=torch.long)
+    b = torch.ones(1,dtype=torch.long)
+
+    assert not torch.isfinite(b/a)
+
+
+def test_div_zero_python():
+    with pytest.raises(ZeroDivisionError):
+        1/0  #
+
+def test_accuracy():
+    preds = torch.randint(0,2,size=(100,))
+    targets = preds.clone()
+
+    assert compute_accuracy(preds, targets) == 1.0
+
+    preds = torch.tensor([1,2,3,0,0,0])
+    targets = torch.tensor([1,2,3,4,5,6])
+
+    assert compute_accuracy(preds, targets) == 0.5  # This is bad - why?
+
+@pytest.mark.parametrize("preds,targets,result",[
+    (torch.tensor([1,2,3]),torch.tensor([1,2,3]), 1.0),
+    (torch.tensor([1,2,3]),torch.tensor([0,0,0]), 0.0),
+    (torch.tensor([1,2,3]),torch.tensor([1,2,0]), 2/3),
+    ])
+def test_accuracy_parametrized(preds, targets, result):
+    assert torch.allclose(compute_accuracy(preds, targets), torch.tensor([result]), rtol=0, atol=1e-5)
diff --git a/week02_management_and_testing/example_project/train.py b/week02_management_and_testing/example_project/train.py
@@ -0,0 +1,91 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import wandb
+from torchvision.datasets import CIFAR10
+from torchvision.models import resnet18
+from tqdm import tqdm, trange
+
+from hparams import config
+
+def compute_accuracy(preds, targets):
+    result = (targets == preds).float().sum()
+    return result
+
+
+def main():
+    wandb.init(config=config, project="effdl_example", name="baseline")
+
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
+        transforms.Resize((224, 224)),
+    ])
+
+    train_dataset = CIFAR10(root='CIFAR10/train',
+                            train=True,
+                            transform=transform,
+                            download=False,
+                            )
+
+    test_dataset = CIFAR10(root='CIFAR10/test',
+                           train=False,
+                           transform=transform,
+                           download=False,
+                           )
+
+    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                               batch_size=config["batch_size"],
+                                               shuffle=True)
+
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+                                              batch_size=config["batch_size"])
+
+    device = torch.device("cuda")
+
+    model = resnet18(pretrained=False, num_classes=10, zero_init_residual=config["zero_init_residual"])
+    model.to(device)
+    wandb.watch(model)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
+
+    for epoch in trange(config["epochs"]):
+        for i, (images, labels) in enumerate(tqdm(train_loader)):
+            images = images.to(device)
+            labels = labels.to(device)
+
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+
+            if i % 100 == 0:
+                all_preds = []
+                all_labels = []
+
+                for test_images, test_labels in test_loader:
+                    test_images = test_images.to(device)
+                    test_labels = test_labels.to(device)
+
+                    with torch.inference_mode():
+                        outputs = model(test_images)
+                        preds = torch.argmax(outputs, 1)
+
+                        all_preds.append(preds)
+                        all_labels.append(test_labels)
+
+                accuracy = compute_accuracy(torch.cat(all_preds), torch.cat(all_labels))
+
+                metrics = {'test_acc': accuracy, 'train_loss': loss}
+                wandb.log(metrics, step=epoch * len(train_dataset) + (i + 1) * config["batch_size"])
+    torch.save(model.state_dict(), "model.pt")
+
+    with open("run_id.txt", "w+") as f:
+        print(wandb.run.id, file=f)
+
+
+if __name__ == '__main__':
+    main()