МЕТОДЫ:
1. Conv Net + max-pooling
2. Conv Net + max pooling + dropout in fully connected layers
3. Conv Net + max pooling + dropout in all layers
4. Conv Net + maxout



In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import time
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Используется устройство: {device}')

Используется устройство: cuda


In [2]:
def load_svhn_data():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Нормализация в [-1, 1]
    ])

    train_dataset = datasets.SVHN(root='./data', split='train', download=True, transform=transform)
    test_dataset = datasets.SVHN(root='./data', split='test', download=True, transform=transform)

    indices = torch.randperm(len(train_dataset))[:10000]
    train_dataset = torch.utils.data.Subset(train_dataset, indices)

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

    print(f"Обучающих образцов: {len(train_dataset)}")
    print(f"Тестовых образцов: {len(test_dataset)}")

    return train_loader, test_loader

In [3]:
class ConvNetMaxPooling(nn.Module):
    def __init__(self, dropout_rate=0.0):
        super(ConvNetMaxPooling, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

        self.dropout = nn.Dropout(dropout_rate)
        self.dropout_rate = dropout_rate

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        if self.dropout_rate > 0:
            x = self.dropout(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        if self.dropout_rate > 0:
            x = self.dropout(x)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        if self.dropout_rate > 0:
            x = self.dropout(x)

        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        if self.dropout_rate > 0:
            x = self.dropout(x)
        x = self.fc2(x)
        return x

In [4]:
class ConvNetMaxPoolingDropoutFC(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(ConvNetMaxPoolingDropoutFC, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)

        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [5]:
class Maxout(nn.Module):
    #Maxout: y_i = max_{j=1..k} (w_{ij} * x + b_{ij})
    #num_features — входная размерность
    #num_units "выходных нейронов"
    #k            — количество аффинных копий (pieces), из которых берётся max. Для каждого нейрона вычисляется k линейных преобразований, и берется максимум из них.
    def __init__(self, num_features, num_units, k=2):
        super(Maxout, self).__init__()
        self.num_units = num_units
        self.k = k
        # Единый Linear слой, выдающий num_units * k значений → затем reshape + max
        self.linear = nn.Linear(num_features, num_units * k)

    def forward(self, x):
        # x: (B, num_features)
        output = self.linear(x)  # → (B, num_units * k)
        output = output.view(-1, self.num_units, self.k)  # → (B, num_units, k). Разделяем выходы на группы по k для каждого нейрона.
        return torch.max(output, dim=2)[0]  # → (B, num_units). Выбираем максимум из k копий для каждого нейрона.

class ConvNetMaxout(nn.Module):
    def __init__(self, num_classes=10, k=2, dropout_rate=0.0): # Добавляем dropout_rate
        super(ConvNetMaxout, self).__init__()
        self.k = k
        self.dropout_rate = dropout_rate # Сохраняем dropout rate

        # Conv1: 3 → 32*k → maxout → 32
        self.conv1 = nn.Conv2d(3, 32 * k, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool2d(2, 2)
        if self.dropout_rate > 0:
            self.dropout_conv1 = nn.Dropout(dropout_rate)

        # Conv2: 32 → 64*k → maxout → 64
        self.conv2 = nn.Conv2d(32, 64 * k, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(2, 2)
        if self.dropout_rate > 0:
            self.dropout_conv2 = nn.Dropout(dropout_rate)

        # Conv3: 64 → 128*k → maxout → 128
        self.conv3 = nn.Conv2d(64, 128 * k, kernel_size=5, padding=2)
        self.pool3 = nn.MaxPool2d(2, 2)
        if self.dropout_rate > 0:
            self.dropout_conv3 = nn.Dropout(dropout_rate)

        # После pool3: 128 x 4 x 4 (если вход 32x32)
        self.fc1 = Maxout(num_features=128 * 4 * 4, num_units=512, k=k)
        if self.dropout_rate > 0:
            self.dropout_fc = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        # Layer 1
        x = self.conv1(x)  # (B, 32*k, H, W)
        x = x.view(x.size(0), 32, self.k, x.size(2), x.size(3))  # (B, 32, k, H, W)
        x = torch.max(x, dim=2)[0]  # (B, 32, H, W)
        x = self.pool1(x)
        if self.dropout_rate > 0:
            x = self.dropout_conv1(x)

        # Layer 2
        x = self.conv2(x)  # (B, 64*k, H, W)
        x = x.view(x.size(0), 64, self.k, x.size(2), x.size(3))
        x = torch.max(x, dim=2)[0]
        x = self.pool2(x)
        if self.dropout_rate > 0:
            x = self.dropout_conv2(x)

        # Layer 3
        x = self.conv3(x)
        x = x.view(x.size(0), 128, self.k, x.size(2), x.size(3))
        x = torch.max(x, dim=2)[0]
        x = self.pool3(x)
        if self.dropout_rate > 0:
            x = self.dropout_conv3(x)

        # Classifier
        x = x.view(x.size(0), -1)  # (B, 128*4*4)
        x = self.fc1(x)  # Maxout → (B, 512)
        if self.dropout_rate > 0:
            x = self.dropout_fc(x)
        x = self.fc2(x)  # (B, num_classes)
        return x

In [6]:
class ConvNetMaxPoolingDropoutAll(nn.Module):
    #Dropout с вероятностями (0.9, 0.75, 0.75, 0.5, 0.5, 0.5)
    def __init__(self, use_dropout=True):
        super(ConvNetMaxPoolingDropoutAll, self).__init__()
        self.use_dropout = use_dropout

        self.conv1 = nn.Conv2d(3, 96, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.MaxPool2d(3, stride=2, padding=1)
        self.dropout1 = nn.Dropout(0.1)  # p=0.9 -> dropout=0.1

        self.conv2 = nn.Conv2d(96, 128, kernel_size=5, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(3, stride=2, padding=1)
        self.dropout2 = nn.Dropout(0.25)  # p=0.75 -> dropout=0.25

        self.conv3 = nn.Conv2d(128, 256, kernel_size=5, stride=1, padding=2)
        self.pool3 = nn.MaxPool2d(3, stride=2, padding=1)
        self.dropout3 = nn.Dropout(0.25)  # p=0.75 -> dropout=0.25

        # После трех pooling размер будет примерно 4x4
        self.fc1 = nn.Linear(256 * 4 * 4, 2048)
        self.dropout4 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(2048, 2048)
        self.dropout5 = nn.Dropout(0.5)

        self.fc3 = nn.Linear(2048, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.dropout1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.dropout2(x)

        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = self.dropout3(x)

        x = x.view(-1, 256 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout4(x)
        x = F.relu(self.fc2(x))
        x = self.dropout5(x)
        x = self.fc3(x)
        return x

    def apply_max_norm_constraint(self, c=4.0):
        for module in self.modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.data
                norm = weight.norm(2, dim=1, keepdim=True)
                weight = weight * torch.clamp(c / norm, max=1.0)
                module.weight.data = weight

In [7]:
def train_model(model, train_loader, num_epochs=10, use_max_norm=False):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.95, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    model.train()
    train_losses = []

    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            if use_max_norm:
                model.apply_max_norm_constraint(c=4.0)

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        train_losses.append(avg_loss)
        scheduler.step()

        if (epoch + 1) % 2 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

    return train_losses

In [8]:
def evaluate_model(model, test_loader):
    model = model.to(device)
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            test_loss += loss.item()

            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total
    avg_loss = test_loss / len(test_loader)
    return accuracy, avg_loss

In [9]:
def main():
    train_loader, test_loader = load_svhn_data()
    results = {}

    print("Conv Net + max-pooling")
    model1 = ConvNetMaxPooling(dropout_rate=0.0)
    train_losses1 = train_model(model1, train_loader, num_epochs=10)
    acc1, loss1 = evaluate_model(model1, test_loader)
    results['ConvNet_MaxPooling_NoDropout'] = {'accuracy': acc1, 'loss': loss1}
    print(f"Точность на тесте: {acc1:.2f}%, Потеря: {loss1:.4f}")

    print("Conv Net + max-pooling + dropout в FC слоях")
    model2 = ConvNetMaxPoolingDropoutFC(dropout_rate=0.5)
    train_losses2 = train_model(model2, train_loader, num_epochs=10)
    acc2, loss2 = evaluate_model(model2, test_loader)
    results['ConvNet_MaxPooling_DropoutFC'] = {'accuracy': acc2, 'loss': loss2}
    print(f"Точность на тесте: {acc2:.2f}%, Потеря: {loss2:.4f}")

    print("Conv Net + MaxPool + Dropout in all layers")
    model10 = ConvNetMaxPoolingDropoutAll()
    train_losses10 = train_model(model10, train_loader, num_epochs=15, use_max_norm=True)
    acc10, loss10 = evaluate_model(model10, test_loader)
    results['ConvNet_MaxPooling_DropoutAll'] = {'accuracy': acc10, 'loss': loss10}
    print(f"Точность на тесте: {acc10:.2f}%, Потеря: {loss10:.4f}")

    print("Conv Net + maxout")
    model5 = ConvNetMaxout()
    train_losses5 = train_model(model5, train_loader, num_epochs=10)
    acc5, loss5 = evaluate_model(model5, test_loader)
    results['ConvNet_Maxout'] = {'accuracy': acc5, 'loss': loss5}
    print(f"Точность на тесте: {acc5:.2f}%, Потеря: {loss5:.4f}")

    print("Conv Net + maxout + Dropout")
    model6 = ConvNetMaxout(dropout_rate=0.5)
    train_losses6 = train_model(model6, train_loader, num_epochs=10)
    acc6, loss6 = evaluate_model(model5, test_loader)
    results['ConvNet_Maxout'] = {'accuracy': acc6, 'loss': loss6}
    print(f"Точность на тесте: {acc6:.2f}%, Потеря: {loss6:.4f}")

    return results

if __name__ == "__main__":
    results = main()



100%|██████████| 182M/182M [00:04<00:00, 42.1MB/s]
100%|██████████| 64.3M/64.3M [00:01<00:00, 32.6MB/s]


Обучающих образцов: 10000
Тестовых образцов: 26032
Conv Net + max-pooling
Epoch [2/10], Loss: 2.2366
Epoch [4/10], Loss: 1.9664
Epoch [6/10], Loss: 0.6675
Epoch [8/10], Loss: 0.4467
Epoch [10/10], Loss: 0.3139
Точность на тесте: 82.58%, Потеря: 0.6234
Conv Net + max-pooling + dropout в FC слоях
Epoch [2/10], Loss: 2.2370
Epoch [4/10], Loss: 1.9554
Epoch [6/10], Loss: 0.7910
Epoch [8/10], Loss: 0.5334
Epoch [10/10], Loss: 0.3824
Точность на тесте: 84.79%, Потеря: 0.5261
Conv Net + MaxPool + Dropout in all layers
Epoch [2/15], Loss: 2.2394
Epoch [4/15], Loss: 2.2103
Epoch [6/15], Loss: 1.8301
Epoch [8/15], Loss: 1.1419
Epoch [10/15], Loss: 0.7740
Epoch [12/15], Loss: 0.5598
Epoch [14/15], Loss: 0.4562
Точность на тесте: 86.26%, Потеря: 0.4676
Conv Net + maxout
Epoch [2/10], Loss: 2.1032
Epoch [4/10], Loss: 0.6602
Epoch [6/10], Loss: 0.2609
Epoch [8/10], Loss: 0.1245
Epoch [10/10], Loss: 0.0439
Точность на тесте: 86.42%, Потеря: 0.6795
Conv Net + maxout + Dropout
Epoch [2/10], Loss: 2.234

In [11]:
# Stacked Sparse Autoencoders - многослойные разреженные автоэнкодеры
# class SparseAutoencoder(nn.Module):
#     #Использует KL-дивергенцию для разреженности.
#     def __init__(self, input_dim, hidden_dim, sparsity_target=0.05, sparsity_weight=0.1):
#         super(SparseAutoencoder, self).__init__()
#         self.input_dim = input_dim
#         self.hidden_dim = hidden_dim
#         self.sparsity_target = sparsity_target
#         self.sparsity_weight = sparsity_weight

#         self.encoder = nn.Linear(input_dim, hidden_dim)
#         self.decoder = nn.Linear(hidden_dim, input_dim)

#     def forward(self, x):
#         encoded = F.relu(self.encoder(x))
#         decoded = self.decoder(encoded)
#         return encoded, decoded

#     def kl_divergence(self, rho_hat):
#         #Вычисляет KL-дивергенцию для разреженности.
#         #rho_hat - средняя активация скрытых единиц
#         rho = torch.full_like(rho_hat, self.sparsity_target)
#         kl = self.sparsity_target * torch.log(self.sparsity_target / (rho_hat + 1e-8)) + \
#              (1 - self.sparsity_target) * torch.log((1 - self.sparsity_target) / (1 - rho_hat + 1e-8))
#         return kl.sum()

# class StackedSparseAutoencoder(nn.Module):
#     #предобучениe признаков -> классификатор
#     def __init__(self, input_dim=3072, hidden_dims=[512, 256], num_classes=10, use_dropout=False):
#         super(StackedSparseAutoencoder, self).__init__()
#         self.use_dropout = use_dropout

#         layers = []
#         dims = [input_dim] + hidden_dims

#         for i in range(len(dims) - 1):
#             layers.append(nn.Linear(dims[i], dims[i+1]))
#             layers.append(nn.ReLU())
#             if use_dropout:
#                 layers.append(nn.Dropout(0.5))

#         self.encoder = nn.Sequential(*layers)

#         self.classifier = nn.Linear(hidden_dims[-1], num_classes)

#     def forward(self, x):
#         x = x.view(x.size(0), -1)  # Flatten
#         features = self.encoder(x)
#         output = self.classifier(features)
#         return output

    #print("Stacked Sparse Autoencoders БЕЗ dropout")
    # model14 = StackedSparseAutoencoder(input_dim=32*32*3, hidden_dims=[512, 256], use_dropout=False)
    # train_losses14 = train_model(model14, train_loader, num_epochs=10)
    # acc14, loss14 = evaluate_model(model14, test_loader)
    # results['StackedSparseAE_NoDropout'] = {'accuracy': acc14, 'loss': loss14}
    # print(f"Точность на тесте: {acc14:.2f}%, Потеря: {loss14:.4f}")

    # print("Stacked Sparse Autoencoders С dropout")
    # model15 = StackedSparseAutoencoder(input_dim=32*32*3, hidden_dims=[512, 256], use_dropout=True)
    # train_losses15 = train_model(model15, train_loader, num_epochs=10)
    # acc15, loss15 = evaluate_model(model15, test_loader)
    # results['StackedSparseAE_WithDropout'] = {'accuracy': acc15, 'loss': loss15}
    # print(f"Точность на тесте: {acc15:.2f}%, Потеря: {loss15:.4f}")