In [None]:
import sys

sys.path.append('..')

from torch.utils.data import DataLoader
import pandas as pd
from pathlib import Path
import numpy as np

from src.trainer import Trainer
from src.dataset import HumanPosesDataset
from sklearn.model_selection import train_test_split
import torch

In [None]:
import plotly.io as pio
pio.renderers.default = "browser"

# fjsaodifjodsf

In [None]:
from torchvision import transforms

mean = [0.4638, 0.4522, 0.4148]
std = [0.2222, 0.2198, 0.2176]

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.5, 1.0), ratio=(0.75, 1.33)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    transforms.RandomErasing(p=0.25, scale=(0.02, 0.2), ratio=(0.3, 3.3), value='random')
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

In [None]:
CSV_PATH = Path("../data/human_poses_data/train_answers.csv")
TRAIN_DIR = Path("../data/human_poses_data/img_train")

df = pd.read_csv(CSV_PATH)

train_ids, val_ids = train_test_split(
    df['img_id'].values,
    test_size=0.2,
    stratify=df['target_feature'],
    random_state=42
)

train_df = df[df['img_id'].isin(train_ids)].reset_index(drop=True)
val_df = df[df['img_id'].isin(val_ids)].reset_index(drop=True)

train_dataset = HumanPosesDataset(
    data_df=train_df,
    img_dir=TRAIN_DIR,
    transform=train_transform,
)

val_dataset = HumanPosesDataset(
    data_df=val_df,
    img_dir=TRAIN_DIR,
    transform=val_transform,
)



train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

In [None]:
num_classes = len(np.unique(df['target_feature']))
print(f"Количество классов: {num_classes}")

# fdosjkfopisdf

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

In [None]:
from src.models.tinyvit import TinyViT
from torch import nn

base_model = TinyViT(in_chans=3, num_classes=num_classes, model_size='11M')

class TinyViTEncoder(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.patch_embed = base_model.patch_embed
        self.stages = base_model.stages
    def forward(self, x):
        x = self.patch_embed(x)
        for stage in self.stages:
            x = stage(x)
        return x

In [None]:
def patchify(imgs, patch_size):
    B, C, H, W = imgs.shape
    h = w = H // patch_size
    patches = imgs.reshape(B, C, h, patch_size, w, patch_size)
    patches = patches.permute(0, 2, 4, 3, 5, 1).reshape(B, h * w, patch_size * patch_size * C)
    return patches

def unpatchify(patches, patch_size, img_size):
    B, N, D = patches.shape
    h = w = img_size // patch_size
    patches = patches.reshape(B, h, w, patch_size, patch_size, 3)
    patches = patches.permute(0, 5, 1, 3, 2, 4)
    imgs = patches.reshape(B, 3, img_size, img_size)
    return imgs

In [None]:
class MAEWrapper(nn.Module):
    def __init__(self, encoder: nn.Module, encoder_dim: int,
                 img_size=224, patch_size=16,
                 decoder_dim=512, decoder_depth=4, mask_ratio=0.75):
        super().__init__()
        self.encoder = encoder
        self.mask_ratio = mask_ratio
        self.patch_size = patch_size
        self.img_size = img_size
        self.num_patches = (img_size // patch_size) ** 2

        self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_dim))
        self.decoder_pos_embed = nn.Parameter(torch.randn(1, self.num_patches, decoder_dim))

        self.encoder_to_decoder = nn.Linear(encoder_dim, decoder_dim, bias=False)

        self.decoder_blocks = nn.Sequential(*[
            nn.TransformerEncoderLayer(d_model=decoder_dim, nhead=8, dim_feedforward=2048, dropout=0.1, activation='gelu')
            for _ in range(decoder_depth)
        ])

        self.decoder_pred = nn.Linear(decoder_dim, patch_size * patch_size * 3)

    def forward(self, imgs):
        patches = patchify(imgs, self.patch_size)
        B, N, D = patches.shape

        len_keep = int(N * (1 - self.mask_ratio))
        noise = torch.rand(B, N, device=imgs.device)
        ids_shuffle = torch.argsort(noise, dim=1)
        ids_restore = torch.argsort(ids_shuffle, dim=1)
        ids_keep = ids_shuffle[:, :len_keep]



        feats = self.encoder(imgs)
        B, C, H, W = feats.shape
        x = feats.flatten(2).transpose(1, 2)

        x = self.encoder_to_decoder(x)

        mask_tokens = self.mask_token.repeat(B, N - len_keep, 1)
        x_full = torch.cat([x, mask_tokens], dim=1)
        x_full = torch.gather(x_full, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[-1]))
        x_full = x_full + self.decoder_pos_embed

        x_dec = self.decoder_blocks(x_full)
        pred = self.decoder_pred(x_dec)

        target = patches
        mask = torch.ones([B, N], device=imgs.device)
        mask.scatter_(1, ids_keep, 0)
        loss = ((pred - target) ** 2 * mask.unsqueeze(-1)).sum() / mask.sum()

        return loss, pred, mask

In [None]:
import torch
from torch.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm

def train_mae_epoch(model, dataloader, optimizer, scheduler=None, scaler=None,
                    device="cuda", patch_size=16, max_norm=1.0, desc="MAE Train Epoch"):
    model.train()
    total_loss = 0
    pbar = tqdm(dataloader, desc=desc)

    for imgs in pbar:
        imgs = imgs.to(device)

        targets = patchify(imgs, patch_size).to(device)

        optimizer.zero_grad(set_to_none=True)

        with autocast(enabled=scaler is not None, device_type='cuda'):
            _, preds, _ = model(imgs)
            loss = torch.nn.functional.mse_loss(preds, targets)

        if scaler:
            scaler.scale(loss).backward()
            if max_norm is not None:
                scaler.unscale_(optimizer)
                clip_grad_norm_(model.parameters(), max_norm)
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            if max_norm is not None:
                clip_grad_norm_(model.parameters(), max_norm)
            optimizer.step()

        if scheduler:
            scheduler.step()

        total_loss += loss.item()
        pbar.set_postfix({"loss": loss.item()})

    avg_loss = total_loss / len(dataloader)
    return avg_loss

In [None]:
from torch.utils.data import DataLoader
from torch.optim import AdamW
from src.mae_dataset import MAEDataset

dataset = MAEDataset("../data/human_poses_data/img_train/", img_size=224)
loader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4)

In [None]:
base = TinyViT(model_size='11M')
encoder = TinyViTEncoder(base)
mae = MAEWrapper(encoder, encoder_dim=448).cuda()

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 50

criterion = nn.MSELoss()

optimizer = AdamW(mae.parameters(), lr=3e-4, weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCH,
    eta_min=1e-6
)

scaler = GradScaler()

In [None]:
for epoch in range(NUM_EPOCH):
    avg_loss = train_mae_epoch(
        model=mae,
        dataloader=loader,
        optimizer=optimizer,
        scheduler=scheduler,
        scaler=scaler,
        device="cuda",
        patch_size=16,
        max_norm=1.0,
        desc=f"Epoch {epoch+1}"
    )

    print(f"[Epoch {epoch+1}] Loss: {avg_loss:.4f}")
    torch.save(encoder.state_dict(), f"mae_encoder_epoch{epoch+1}.pth")


# мовымщоывща

In [None]:
model = TinyViT(model_size="11M", num_classes=16)
model.load_state_dict(torch.load("mae_encoder_epoch50.pth"), strict=False)

In [None]:
for param in model.patch_embed.parameters():
    param.requires_grad = False
for stage in model.stages:
    for param in stage.parameters():
        param.requires_grad = False

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 10

optimizer = AdamW(model.parameters(), lr=3e-4, weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=NUM_EPOCH, eta_min=1e-6
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=1.0, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_1",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
from src.utils import load_best_model

load_best_model(model, 'checkpoints/ssl_vit_1_1_best.pth', device)

In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 50

head_params = list(model.head.parameters()) + list(model.norm_head.parameters())
encoder_params = [p for n, p in model.named_parameters() if ("head" not in n and "norm_head" not in n)]

optimizer = AdamW([
    {'params': encoder_params, 'lr': 3e-4},
    {'params': head_params, 'lr': 3e-3}
], weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=[3e-4, 3e-3],
    steps_per_epoch=len(train_loader),
    epochs=NUM_EPOCH,
    pct_start=0.1,
    anneal_strategy='cos',
    div_factor=10.0,
    final_div_factor=500
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=1.0, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_2",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
load_best_model(model, 'checkpoints/ssl_vit_1_2_best.pth', device)

In [None]:
NUM_EPOCH = 100

head_params = list(model.head.parameters()) + list(model.norm_head.parameters())
encoder_params = [p for n, p in model.named_parameters() if ("head" not in n and "norm_head" not in n)]

optimizer = AdamW([
    {'params': encoder_params, 'lr': 1e-4},
    {'params': head_params, 'lr': 3e-4}
], weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCH,
    eta_min=1e-6
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=0.75, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_2",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
from src.utils import load_best_model

load_best_model(model, 'checkpoints/ssl_vit_1_2_best.pth', device)

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 75

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCH,
    eta_min=1e-7
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=0.5, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_3",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
from src.utils import load_best_model

load_best_model(model, 'checkpoints/ssl_vit_1_3_best.pth', device)

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 75

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCH,
    eta_min=1e-7
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=0.25, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_4",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
load_best_model(model, 'checkpoints/ssl_vit_1_4_best.pth', device)

In [None]:
from src.test_dataset_for_tta import TestDataset, make_submission_with_tta
from torch.utils.data import DataLoader

TEST_DIR = Path("../data/human_poses_data/img_test")

def pil_collate(batch):
    images, ids = zip(*batch)
    return list(images), list(ids)


test_image_paths = list(TEST_DIR.glob("*.jpg"))
test_ids = [int(p.stem) for p in test_image_paths]

test_dataset = TestDataset(test_image_paths, test_ids)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    collate_fn=pil_collate,
    pin_memory=True
)

make_submission_with_tta(model, test_loader, device, train_dataset.index_to_class)

In [None]:
!kaggle competitions submit -c ml-intensive-yandex-academy-spring-2025 -f submission.csv -m "Message"

In [None]:
from src.utils import load_best_model

load_best_model(model, 'checkpoints/ssl_vit_1_4_best.pth', device)

In [None]:
from torch.amp import GradScaler

NUM_EPOCH = 75

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=NUM_EPOCH,
    eta_min=1e-7
)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

scaler = GradScaler()

In [None]:
from src.utils import MixupCutMixAugmenter

mixup_cutmix_fn = MixupCutMixAugmenter(alpha=0.25, p_mixup=0.5)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    batch_augment_fn=mixup_cutmix_fn,
    experiment_name="ssl_vit_1_5",
    use_wandb=True,
    seed=42,
)

history = trainer.train()

In [None]:
load_best_model(model, 'checkpoints/ssl_vit_1_5_best.pth', device)

In [None]:
from src.test_dataset_for_tta import TestDataset, make_submission_with_tta
from torch.utils.data import DataLoader

TEST_DIR = Path("../data/human_poses_data/img_test")

def pil_collate(batch):
    images, ids = zip(*batch)
    return list(images), list(ids)


test_image_paths = list(TEST_DIR.glob("*.jpg"))
test_ids = [int(p.stem) for p in test_image_paths]

test_dataset = TestDataset(test_image_paths, test_ids)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    collate_fn=pil_collate,
    pin_memory=True
)

make_submission_with_tta(model, test_loader, device, train_dataset.index_to_class)

In [None]:
!kaggle competitions submit -c ml-intensive-yandex-academy-spring-2025 -f submission.csv -m "Message"