In [1]:
'''

Будем решать задачу классификации изображений Fashion MNIST с помощью полносвяных нейронных сетей. Напишем простое решение, а потом улучшим его.

'''

'\n\nБудем решать задачу классификации изображений Fashion MNIST с помощью полносвяных нейронных сетей. Напишем простое решение, а потом улучшим его.\n\n'

In [1]:
import torch
print(torch.cuda.is_available())

True


In [1]:
import wandb
wandb.login(key="972e1a7e81b595b8a22b3c53552a91707540b820")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msamiralzgul[0m ([33mrncomp[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from dataclasses import dataclass
import numpy as np
from torch.optim.sgd import SGD
from torch.optim.lr_scheduler import ExponentialLR, LinearLR, StepLR
import tqdm
import wandb
from pathlib import Path
import tarfile
import http.client

@dataclass
class TrainConfig:
    lr: float = 0.1
    eval_every: int = 10
    total_iterations: int = 3000
    scheduler_type: str = "none"
    model_type: str = "batch_norm"
    optimizer_type: str = "sgd"
    gamma: float = 0.99  # Used for ExponentialLR if scheduler_type is "exp"

def set_seed(seed: int):
    torch.cuda.manual_seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

def get_datasets():
    train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=ToTensor())
    test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=ToTensor())
    return train_dataset, test_dataset

class GenericModel(nn.Module):
    def __init__(self, num_classes=10, model_type="batch_norm"):
        super().__init__()
        hidden_dim = 512
        self.model_type = model_type
        self.net = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(num_features=hidden_dim) if model_type == "batch_norm" else nn.Identity(),
            nn.Linear(in_features=hidden_dim, out_features=num_classes),
        )

    def forward(self, x: torch.Tensor):
        x = x.reshape((-1, 28*28))
        return self.net(x)

def get_optimizer(model: nn.Module, config: TrainConfig):
    if config.optimizer_type == "sgd":
        return SGD(model.parameters(), lr=config.lr)
    elif config.optimizer_type == "adam":
        return Adam(model.parameters(), lr=config.lr)
    else:
        raise ValueError(f"Unknown optimizer type: {config.optimizer_type}")

def get_scheduler(optimizer: optim.Optimizer, config: TrainConfig):
    if config.scheduler_type == "exp":
        return ExponentialLR(optimizer, gamma=config.gamma)
    elif config.scheduler_type == "none":
        return None
    else:
        raise ValueError(f"Unknown scheduler type: {config.scheduler_type}")

def train_loop(model: nn.Module,
               X_train: torch.Tensor,
               y_train: torch.Tensor,
               X_val: torch.Tensor,
               y_val: torch.Tensor,
               config: TrainConfig,
               run_name: str | None = None):

    wandb.init(
        project="model_train_fashion_mnist",
        notes="version2",
        name=run_name,
        config=config
    )
    optimizer = get_optimizer(model, config)
    scheduler = get_scheduler(optimizer, config)
    model.to(device).train()

    for i in tqdm.trange(config.total_iterations):
        optimizer.zero_grad()
        loss = F.cross_entropy(model(X_train.to(device)), y_train.to(device))
        loss.backward()
        optimizer.step()
        
        metrics = {"iteration": i, "loss_train": loss.detach().cpu().item()}

        if (i + 1) % config.eval_every == 0:
            with torch.no_grad():
                model.to(device).eval()
                loss_val = F.cross_entropy(model(X_val.to(device)), y_val.to(device))
                model.train()
                metrics.update({"loss_val": loss_val.detach().cpu().item()})

        if scheduler:
            scheduler.step()
            metrics.update({"lr": scheduler.get_last_lr()[0]})
        else:
            metrics.update({"lr": config.lr})

        wandb.log(metrics)
    
    wandb.finish()

# Main execution
seed = 0
set_seed(seed)
train_dataset, test_dataset = get_datasets()

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"используем {device}")

X_train = train_dataset.data.float().to(device)
y_train = train_dataset.targets.to(device)
X_test = test_dataset.data.float().to(device)
y_test = test_dataset.targets.to(device)

num_classes = y_train[0].item() + 1




используем cuda


In [31]:
config = TrainConfig(eval_every=20, lr=2, total_iterations=3000, scheduler_type="exp", model_type="batch_norm", optimizer_type="sgd")
model = GenericModel(num_classes=num_classes, model_type=config.model_type)
train_loop(model, X_train, y_train, X_test, y_test, config=config, run_name=f"base_model_classification")

100%|██████████| 3000/3000 [01:50<00:00, 27.17it/s]


0,1
iteration,▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
loss_train,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
loss_val,█▂▂▁▂▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
lr,█▇▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
iteration,2999.0
loss_train,0.30629
loss_val,0.65665
lr,0.0


In [32]:
calculate_accuracy(model(X_test.to(device)), y_test.to(device))

0.8702999949455261

In [33]:
config = TrainConfig(eval_every=20, lr = 2, total_iterations=3000, scheduler_type="none", model_type="batch_norm", optimizer_type="sgd")
model = GenericModel(num_classes=num_classes, model_type=config.model_type)
train_loop(model, X_train, y_train, X_test, y_test, config=config, run_name=f"base_model_classification")

100%|██████████| 3000/3000 [01:52<00:00, 26.65it/s]


0,1
iteration,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇█████
loss_train,██▆▅▆▅▇▄▄▃▃▃▃▃▂▃▃▄▃▃▆▂▂▃▂▂▁▂▂▂▂▂▂▁▂▁▁▁▁▁
loss_val,▁▁▄▅▅▄▅▅▅▅▅▄█▅▆▆▅▄▅▅▅▅▅▄▄▅▅▅▅▅▄▇▆▅▅▅▅▄▆▅
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
iteration,2999.0
loss_train,0.17259
loss_val,1.27039
lr,2.0


In [34]:
calculate_accuracy(model(X_test.to(device)), y_test.to(device))

0.8725999593734741