In [1]:
import sys

from src.models.miniconvnext import MiniConvNeXt

sys.path.append('..')

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import numpy as np

from src.trainer import Trainer
from src.dataset import HumanPosesDataset
from sklearn.model_selection import train_test_split
import torchvision.transforms as T
import torch

In [2]:
import plotly.io as pio
pio.renderers.default = "browser" 

# Сначала посчитаю mean и std


In [20]:
CSV_PATH = Path("../data/human_poses_data/train_answers.csv")
TRAIN_DIR = Path("../data/human_poses_data/img_train")

df = pd.read_csv(CSV_PATH)
train_ids, val_ids = train_test_split(
    df['img_id'].values,
    test_size=0.2,
    stratify=df['target_feature'],
    random_state=42
)

train_df = df[df['img_id'].isin(train_ids)]
val_df = df[df['img_id'].isin(val_ids)]

In [4]:
from torchvision import transforms
from torch.utils.data import DataLoader, Subset

transform_for_stats = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset_raw = HumanPosesDataset(
    root_dir=TRAIN_DIR.parent,
    mode='train',
    transform=transform_for_stats
)

train_idx = train_df.index.tolist()
train_dataset_raw = Subset(train_dataset_raw, train_idx[:256])

train_loader_raw = DataLoader(
    train_dataset_raw,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)


In [5]:
def compute_mean_std(loader):
    mean = 0.
    std = 0.
    nb_samples = 0.

    for data, _ in tqdm(loader, desc="Computing mean/std"):
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples
    return mean, std

mean, std = compute_mean_std(train_loader_raw)
print("Mean:", mean)
print("Std:", std)

Computing mean/std: 100%|██████████| 8/8 [00:15<00:00,  1.88s/it]

Mean: tensor([0.4638, 0.4522, 0.4148])
Std: tensor([0.2222, 0.2198, 0.2176])





In [3]:
from torchvision import transforms

mean = [0.4638, 0.4522, 0.4148]
std = [0.2222, 0.2198, 0.2176]

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(0.75, 1.33)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)
    ], p=0.3),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


In [4]:
CSV_PATH = Path("../data/human_poses_data/train_answers.csv")
TRAIN_DIR = Path("../data/human_poses_data/img_train")

df = pd.read_csv(CSV_PATH)
train_ids, val_ids = train_test_split(
    df['img_id'].values,
    test_size=0.2,
    stratify=df['target_feature'],
    random_state=42
)

train_df = df[df['img_id'].isin(train_ids)]
val_df = df[df['img_id'].isin(val_ids)]

train_dataset = HumanPosesDataset(
    root_dir=TRAIN_DIR.parent, mode='train', transform=train_transform
)
val_dataset = HumanPosesDataset(
    root_dir=TRAIN_DIR.parent, mode='train', transform=val_transform
)

train_idx = train_df.index.tolist()
val_idx = val_df.index.tolist()

train_dataset = Subset(train_dataset, train_idx)
val_dataset = Subset(val_dataset, val_idx)


train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

Train dataset size: 9892
Validation dataset size: 2474


In [5]:
num_classes = len(np.unique(df['target_feature'])) 
print(f"Количество классов: {num_classes}")

Количество классов: 16


# Обучение

In [6]:
model = MiniConvNeXt(num_classes=num_classes)

In [7]:
NUM_EPOCH = 25

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCH)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

device = torch.device("mps" if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: mps


In [8]:
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCH,
    optimizer=optimizer,
    scheduler=scheduler,
    device = device,
    criterion=criterion,
    experiment_name="miniconvnext",
    use_wandb=True,
    seed=42
)

history = trainer.train()

Train 24:   0%|          | 0/155 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [9]:
from src.utils import load_best_model

model = load_best_model(model, "checkpoints/miniconvnext_epoch16.pth", device=device)

✅ Loaded model weights from checkpoints/miniconvnext_epoch16.pth


In [10]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=15,
    optimizer=optimizer,
    scheduler=scheduler,
    device = device,
    criterion=criterion,
    experiment_name="miniconvnext_2",
    use_wandb=True,
    seed=42
)

history = trainer.train()

Train 10:   0%|          | 0/155 [00:04<?, ?it/s]


KeyboardInterrupt: 

он переобучается, нужны более сильные аугментации