# Task results

In [None]:
% cd ../

**Fetch and Load Dataset**

In [None]:
# ruff: noqa: E501
import os

os.makedirs("./data/")

!wget https://www-cs-toronto-edu.translate.goog/~kriz/cifar-10-python.tar.gz?_x_tr_sl=en&_x_tr_tl=es&_x_tr_hl=es&_x_tr_pto=tcb -O data/cifar-10-python.tar.gz
!tar -xvzf cifar-10-python.tar.gz?_x_tr_sl=en

**Load model**

In [None]:
from short_research_project.model.clip import load_clip

clip_model, preprocess, tokenizer = load_clip("cuda")

**Load data**

In [None]:
from short_research_project.dataset import CIFAR10Dataset
from short_research_project.utils import load_meta

train_dataset = CIFAR10Dataset(
    [
        "data/cifar-10-batches-py/data_batch_1",
        "data/cifar-10-batches-py/data_batch_2",
        "data/cifar-10-batches-py/data_batch_3",
        "data/cifar-10-batches-py/data_batch_4",
    ],
    n_images=None,
    transform=preprocess,
)
eval_dataset = CIFAR10Dataset(
    ["data/cifar-10-batches-py/data_batch_5"],
    n_images=None,
    transform=preprocess,
)
test_dataset = CIFAR10Dataset(
    ["data/cifar-10-batches-py/test_batch"],
    n_images=None,
    transform=preprocess,
)

meta = load_meta("data/cifar-10-batches-py/batches.meta")

## 1. Clip with Zero Shot

In [None]:
from loguru import logger

from short_research_project.evaluation import eval_clip

classes = [x.decode("utf-8") for x in meta[b"label_names"]]
logger.info(f"Classes: {classes}")

y_true, y_pred = eval_clip(
    dataset=test_dataset,
    classes=classes,
    clip_model=clip_model,
    tokenizer=tokenizer,
    batch_size=128,
    print_classification_report=True,
)

## 2. Linear probe

In [None]:
from short_research_project.model.linear_probe import LinearProbeModel
from short_research_project.train_linear_probe import train_linear_probe_model

linear_probe_model = LinearProbeModel(input_dim=512, num_classes=10).to("cuda")
linear_probe_model = train_linear_probe_model(
    clip_model=clip_model,
    linear_probe_model=linear_probe_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    batch_size=256,
    device="cuda",
    epochs=10,
    learning_rate=0.001,
    log_every_n_steps=5,
)

# TODO: early stopping
# TODO: more layers
# TODO: evaluate in test set

## 3. Prompt Engineering

In [None]:
prompted_classes = [
    "a photo of an airplane, which is a vehicle",
    "a photo of an automobile, which is a vehicle",
    "a photo of a bird, which is an animal",
    "a photo of a cat, which is an animal",
    "a photo of a deer, which is an animal",
    "a photo of a dog, which is an animal",
    "a photo of a frog, which is an animal",
    "a photo of a horse, which is an animal",
    "a photo of a ship, which is a vehicle",
    "a photo of a truck which is a vehicle",
]

logger.info(f"Classes: {prompted_classes}")

y_true, y_pred = eval_clip(
    dataset=test_dataset,
    classes=prompted_classes,
    clip_model=clip_model,
    tokenizer=tokenizer,
    batch_size=128,
    print_classification_report=True,
)