In [1]:
!pip install torch torchvision numpy scikit-learn

Collecting torch
  Downloading torch-2.9.1-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.1-cp312-cp312-win_amd64.whl.metadata (5.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.9.1-cp312-cp312-win_amd64.whl (110.9 MB)
   ---------------------------------------- 0.0/110.9 MB ? eta -:--:--
   ---------------------------------------- 0.8/110.9 MB 6.6 MB/s eta 0:00:17
    --------------------------------------- 2.6/110.9 MB 7.9 MB/s eta 0:00:14
   - -------------------------------------- 3.9/110.9 MB 7.1 MB/s eta 0:00:16
   - -------------------------------------- 5.0/110.9 MB 6.6 MB/s eta 0:00:17
   -- ------------------------------------- 6.0/110.9 MB 6.1 MB/s eta 0:00:18
   -- ------------------------------------- 7.1/110.9 MB 6.0 MB/s eta 0:00:18
   -- ------------------------------------- 7.9/110.9 MB 5.9 MB/s eta 0:00:18
   --- -----------------------------

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

# гиперпараметры
batch_size = 128
epochs = 10
lr = 1e-3
mc_samples = 20  # для Monte Carlo Dropout

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)
np.random.seed(0)

In [29]:
class CNN(nn.Module):
    def __init__(self, dropout_p=0.3, num_classes=10):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(p=dropout_p)

        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout(x)

        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)

        x = self.pool(F.relu(self.conv3(x)))
        x = self.dropout(x)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [30]:
transform_cifar = transforms.Compose([
    transforms.ToTensor()
])

transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.expand(3, -1, -1))  # 1 канал -> 3 канала
])

train_id = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cifar)
test_id = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar)
test_ood = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)

train_id_loader = DataLoader(train_id, batch_size=batch_size, shuffle=True)
test_id_loader = DataLoader(test_id, batch_size=batch_size, shuffle=False)
test_ood_loader = DataLoader(test_ood, batch_size=batch_size, shuffle=False)

100%|██████████| 170M/170M [00:53<00:00, 3.16MB/s] 
100%|██████████| 9.91M/9.91M [00:05<00:00, 1.88MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 230kB/s]
100%|██████████| 1.65M/1.65M [00:02<00:00, 695kB/s] 
100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]


In [31]:
def train(model, train_loader, epochs, lr):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_function = nn.CrossEntropyLoss()

    for epoch in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        correct = 0
        total = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            logits = model(x)
            loss = loss_function(logits, y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)

        print(f'Epoch {epoch}; Train loss {total_loss / len(train_loader):.4f}; Accuracy {correct / total:.4f}')


model = CNN(dropout_p=0.3, num_classes=10)
train(model, train_id_loader, epochs=epochs, lr=lr)

Epoch 1; Train loss 1.7100; Accuracy 0.3718
Epoch 2; Train loss 1.3558; Accuracy 0.5068
Epoch 3; Train loss 1.2210; Accuracy 0.5638
Epoch 4; Train loss 1.1246; Accuracy 0.5964
Epoch 5; Train loss 1.0452; Accuracy 0.6298
Epoch 6; Train loss 0.9972; Accuracy 0.6499
Epoch 7; Train loss 0.9527; Accuracy 0.6627
Epoch 8; Train loss 0.9090; Accuracy 0.6794
Epoch 9; Train loss 0.8791; Accuracy 0.6899
Epoch 10; Train loss 0.8519; Accuracy 0.6993


In [32]:
def compute_ood_metrics(id_scores, ood_scores):
    """
    id_scores - скоры для ID (CIFAR10),
    ood_scores - скоры для OOD (MNIST).
    Чем больше скор, тем больше модель "сомневается"
    """
    y_true = np.concatenate([
        np.zeros_like(id_scores),
        np.ones_like(ood_scores)
    ])
    scores = np.concatenate([id_scores, ood_scores])

    auroc = roc_auc_score(y_true, scores)
    aupr = average_precision_score(y_true, scores)

    fpr, tpr, _ = roc_curve(y_true, scores)
    target_tpr = 0.95
    idxs = np.where(tpr >= target_tpr)[0]
    if len(idxs) > 0:
        fpr95 = fpr[idxs[0]]
    else:
        fpr95 = 1.0

    print(f'AUROC     {auroc:.3f}')
    print(f'AUPR      {aupr:.3f}')
    print(f'FPR@95TPR {fpr95:.3f}')

    return auroc, aupr, fpr95

In [33]:
def get_softmax_ood_scores(model, id_loader, ood_loader):
    model.to(device)
    model.eval()

    id_scores = []
    ood_scores = []

    with torch.no_grad():
        # ID
        for x, _ in id_loader:
            x = x.to(device)
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            max_probs, _ = probs.max(dim=1)
            scores = 1.0 - max_probs  # 1 - max softmax prob
            id_scores.append(scores.cpu().numpy())

        # OOD
        for x, _ in ood_loader:
            x = x.to(device)
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            max_probs, _ = probs.max(dim=1)
            scores = 1.0 - max_probs
            ood_scores.append(scores.cpu().numpy())

    id_scores = np.concatenate(id_scores)
    ood_scores = np.concatenate(ood_scores)
    return id_scores, ood_scores


softmax_id_scores, softmax_ood_scores = get_softmax_ood_scores(model, test_id_loader, test_ood_loader)

print("Softmax baseline")
softmax_auroc, softmax_aupr, softmax_fpr95 = compute_ood_metrics(softmax_id_scores, softmax_ood_scores)

Softmax baseline
AUROC     0.652
AUPR      0.581
FPR@95TPR 0.715


In [34]:
def get_mcd_ood_entropy(model, x, T=20):
    """
    MC Dropout, T прогонов, считаем среднее p(y|x),
    OOD-скор = энтропия среднего распределения.
    """
    model.to(device)
    model.train()  # включаем Dropout

    with torch.no_grad():
        probs_T = []
        for _ in range(T):
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            probs_T.append(probs.unsqueeze(0))

        probs_T = torch.cat(probs_T, dim=0)       # [T, B, C]
        p_mean = probs_T.mean(dim=0)              # [B, C]

    eps = 1e-8
    entropy = -torch.sum(p_mean * torch.log(p_mean + eps), dim=1)
    return entropy


def get_mcd_ood_scores(model, id_loader, ood_loader, T=20):
    model.to(device)

    id_scores = []
    ood_scores = []

    for x, _ in id_loader:
        x = x.to(device)
        entropy = get_mcd_ood_entropy(model, x, T=T)
        id_scores.append(entropy.cpu().numpy())

    for x, _ in ood_loader:
        x = x.to(device)
        entropy = get_mcd_ood_entropy(model, x, T=T)
        ood_scores.append(entropy.cpu().numpy())

    id_scores = np.concatenate(id_scores)
    ood_scores = np.concatenate(ood_scores)
    return id_scores, ood_scores


mcd_id_scores, mcd_ood_scores = get_mcd_ood_scores(model, test_id_loader, test_ood_loader, T=mc_samples)

print("Monte Carlo Dropout")
mcd_auroc, mcd_aupr, mcd_fpr95 = compute_ood_metrics(mcd_id_scores, mcd_ood_scores)

Monte Carlo Dropout
AUROC     0.724
AUPR      0.625
FPR@95TPR 0.593


In [35]:
def forward_features(model, x):
    """
    Предпоследний слой h(x), путь, как в forward,
    но без последнего fc2
    """
    x = model.pool(F.relu(model.conv1(x)))
    x = model.dropout(x)

    x = model.pool(F.relu(model.conv2(x)))
    x = model.dropout(x)

    x = model.pool(F.relu(model.conv3(x)))
    x = model.dropout(x)

    x = x.view(x.size(0), -1)
    x = F.relu(model.fc1(x))
    x = model.dropout(x)
    return x


def estimate_react_threshold(model, id_loader, percentile=90):
    """
    Порог c для ReAct, percentile-й перцентиль
    по всем значениям h(x) на ID-датасете
    """
    model.to(device)
    model.eval()

    activations = []

    with torch.no_grad():
        for x, _ in id_loader:
            x = x.to(device)
            h = forward_features(model, x)            # [B, D]
            activations.append(h.cpu().numpy().reshape(-1))

    activations = np.concatenate(activations)
    c = np.percentile(activations, percentile)
    print(f"ReAct threshold c (percentile {percentile}): {c:.4f}")
    return c


def forward_react_logits(model, x, c):
    """
    Прямой проход с ReAct, клиппинг h(x) сверху и затем fc2
    """
    h = forward_features(model, x)
    h_clipped = torch.clamp(h, max=c)
    logits = model.fc2(h_clipped)
    return logits


def get_react_ood_scores(model, id_loader, ood_loader, c):
    """
    OOD-скор для ReAct, 1 - max softmax prob
    после клиппинга активаций
    """
    model.to(device)
    model.eval()

    id_scores = []
    ood_scores = []

    with torch.no_grad():
        # ID
        for x, _ in id_loader:
            x = x.to(device)
            logits = forward_react_logits(model, x, c)
            probs = F.softmax(logits, dim=1)
            max_probs, _ = probs.max(dim=1)
            scores = 1.0 - max_probs
            id_scores.append(scores.cpu().numpy())

        # OOD
        for x, _ in ood_loader:
            x = x.to(device)
            logits = forward_react_logits(model, x, c)
            probs = F.softmax(logits, dim=1)
            max_probs, _ = probs.max(dim=1)
            scores = 1.0 - max_probs
            ood_scores.append(scores.cpu().numpy())

    id_scores = np.concatenate(id_scores)
    ood_scores = np.concatenate(ood_scores)
    return id_scores, ood_scores

In [None]:
# порог по train ID (логичнее всего использовать train_id_loader)
react_c = estimate_react_threshold(model, train_id_loader, percentile=90)

# считаем OOD-скоры ReAct
react_id_scores, react_ood_scores = get_react_ood_scores(
    model,
    test_id_loader,
    test_ood_loader,
    c=react_c
)

print("ReAct")
react_auroc, react_aupr, react_fpr95 = compute_ood_metrics(
    react_id_scores,
    react_ood_scores
)

ReAct threshold c (percentile 90): 0.9735
ReAct
AUROC     0.874
AUPR      0.831
FPR@95TPR 0.388
