In [None]:
import warnings
warnings.filterwarnings('ignore')

## Dataset

In [None]:
import torch

class Dataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

## Model

In [None]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return super().forward(x)

## Fit

### Only train

In [None]:
import os
import torch
import numpy as np
from tqdm.auto import tqdm
from torch.utils.data import DataLoader

# --- Настройки ---
batch_size = 16
num_epochs = 3
lr = 3e-4
models_dir = '../models'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# --- Датасет ---
train_dataset = Dataset()
train_loader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    drop_last=True,
    pin_memory=True
)

# --- Модель, оптимизатор, лосс ---
model = Model().to(device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = torch.nn.CrossEntropyLoss()

fit_result = {
    'train_losses': [],
}

# --- Обучение ---
for epoch in range(1, num_epochs + 1):
    model.train()
    train_losses = []

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch} [Train]")
    for model_inputs, target in progress_bar:
        model_inputs = model_inputs.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        logits = model(model_inputs)
        loss = loss_fn(logits, target)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        progress_bar.set_postfix(loss=loss.item())

    train_loss = np.mean(train_losses)
    fit_result['train_losses'].append(train_loss)
    print(f"Epoch {epoch}: Train loss = {train_loss:.4f}")

# --- Сохранение модели ---
os.makedirs(models_dir, exist_ok=True)
torch.save(model.state_dict(), f'{models_dir}/model.pt')
print(f"✅ Обучение завершено, модель сохранена в '{models_dir}/model.pt'")

### Train & Valid

In [None]:
import os
import torch
import numpy as np
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split

# --- Настройки ---
batch_size = 16
num_epochs = 3
lr = 3e-4
val_ratio = 0.2
models_dir = '../models'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# --- Датасеты ---
full_dataset = Dataset()
all_indices = np.arange(len(full_dataset))
train_indices, val_indices = train_test_split(
    all_indices,
    test_size=val_ratio,
    random_state=42,
    shuffle=True,
)

train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    pin_memory=True
)

# --- Модель, оптимизатор, лосс ---
model = Model().to(device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = torch.nn.CrossEntropyLoss()

fit_result = {
    'train_losses': [],
    'val_losses': [],
    'val_accuracies': [],
}

best_val_loss = float('inf')

# --- Обучение ---
for epoch in range(1, num_epochs + 1):
    # --- Train ---
    model.train()
    train_losses = []

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch} [Train]")
    for model_inputs, target in progress_bar:
        model_inputs = model_inputs.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        logits = model(model_inputs)
        loss = loss_fn(logits, target)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())
        progress_bar.set_postfix(loss=loss.item())

    train_loss = np.mean(train_losses)
    fit_result['train_losses'].append(train_loss)

    # --- Validation ---
    model.eval()
    val_losses = []
    correct = 0
    total = 0

    val_bar = tqdm(val_loader, desc=f"Epoch {epoch} [Val]")
    with torch.no_grad():
        for model_inputs, target in val_bar:
            model_inputs = model_inputs.to(device)
            target = target.to(device)

            logits = model(model_inputs)
            loss = loss_fn(logits, target)
            val_losses.append(loss.item())

            preds = logits.argmax(dim=1)
            correct += (preds == target).sum().item()
            total += target.size(0)

            val_bar.set_postfix(loss=loss.item())

    val_loss = np.mean(val_losses)
    val_acc = correct / total

    fit_result['val_losses'].append(val_loss)
    fit_result['val_accuracies'].append(val_acc)

    print(f"Epoch {epoch}:")
    print(f"  Train loss: {train_loss:.4f}")
    print(f"  Val loss:   {val_loss:.4f}")
    print(f"  Val acc:    {val_acc:.4f}")

    # --- Save best model ---
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        os.makedirs(models_dir, exist_ok=True)
        torch.save(model.state_dict(), f'{models_dir}/best_model.pt')
        print(f"  ✅ New best model saved (val_loss={val_loss:.4f})")

# --- Финальное сохранение ---
os.makedirs(models_dir, exist_ok=True)
torch.save(model.state_dict(), f'{models_dir}/last_model.pt')
print("Обучение завершено.")

### Folds train & valid

In [None]:
import os
import torch
import numpy as np
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold

# --- Настройки ---
batch_size = 16
num_epochs = 3
lr = 3e-4
n_splits = 5
models_dir = '../models'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# --- Датасет ---
full_dataset = Dataset()

# --- Подготовка ---
best_models = []
fit_results = []

# --- KFold ---
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset), start=1):
    print(f"\n===== Fold {fold}/{n_splits} =====")

    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)

    train_loader = DataLoader(
        train_subset, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=True
    )
    val_loader = DataLoader(
        val_subset, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=True
    )

    # --- Модель, оптимизатор, лосс ---
    model = Model().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.CrossEntropyLoss()

    fit_result = {
        'train_losses': [],
        'val_losses': [],
        'val_accuracies': [],
    }

    best_val_loss = float('inf')
    best_model_state = None

    # --- Обучение ---
    for epoch in range(1, num_epochs + 1):
        # --- Train ---
        model.train()
        train_losses = []

        progress_bar = tqdm(train_loader, desc=f"Fold {fold} | Epoch {epoch} [Train]")
        for model_inputs, target in progress_bar:
            model_inputs = model_inputs.to(device)
            target = target.to(device)

            optimizer.zero_grad()
            logits = model(model_inputs)
            loss = loss_fn(logits, target)
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())
            progress_bar.set_postfix(loss=loss.item())

        train_loss = np.mean(train_losses)
        fit_result['train_losses'].append(train_loss)

        # --- Validation ---
        model.eval()
        val_losses = []
        correct = 0
        total = 0

        val_bar = tqdm(val_loader, desc=f"Fold {fold} | Epoch {epoch} [Val]")
        with torch.no_grad():
            for model_inputs, target in val_bar:
                model_inputs = model_inputs.to(device)
                target = target.to(device)

                logits = model(model_inputs)
                loss = loss_fn(logits, target)
                val_losses.append(loss.item())

                preds = logits.argmax(dim=1)
                correct += (preds == target).sum().item()
                total += target.size(0)

                val_bar.set_postfix(loss=loss.item())

        val_loss = np.mean(val_losses)
        val_acc = correct / total

        fit_result['val_losses'].append(val_loss)
        fit_result['val_accuracies'].append(val_acc)

        print(f"Epoch {epoch}: Train loss = {train_loss:.4f} | Val loss = {val_loss:.4f} | Val acc = {val_acc:.4f}")

        # --- Save best model per fold ---
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            print(f"  ✅ New best model for fold {fold} (val_loss={val_loss:.4f})")

    # --- Сохраняем результаты и модель ---
    fit_results.append(fit_result)
    best_models.append(best_model_state)
    os.makedirs(models_dir, exist_ok=True)
    torch.save(best_model_state, f"{models_dir}/best_model_fold{fold}.pt")

print("\n✅ Обучение по фолдам завершено.")
print(f"Всего сохранено лучших моделей: {len(best_models)}")

## Submission

In [None]:
test_set = Dataset()
test_loader = DataLoader(
    test_set,
    batch_size=batch_size,
    shuffle=False, 
    drop_last=False,
    pin_memory=True
)

In [None]:
results = []
with torch.no_grad():
    model.eval()

    for model_inputs in tqdm(test_loader):
        model_inputs = model_inputs.to(device=device)
        results.append(
            model(model_inputs) \
            .cpu().numpy()
        )
results = np.concatenate(results, axis=0)

In [None]:
best_model_preds = []

with torch.no_grad():
    for fold_idx, state_dict in enumerate(best_models, start=1):
        fold_model = Model().to(device=device)
        fold_model.load_state_dict(state_dict)
        fold_model.eval()

        fold_results = []
        for model_inputs in tqdm(test_loader, desc=f"Fold {fold_idx} [Test]"):
            model_inputs = model_inputs.to(device=device)
            fold_results.append(
                fold_model(model_inputs)
                .cpu().numpy()
            )

        best_model_preds.append(np.concatenate(fold_results, axis=0))

best_model_preds = np.stack(best_model_preds, axis=0)
results_best_models = best_model_preds.mean(axis=0)