# Задание 3: Эксперименты с регуляризацией

## 3.1. Сравнение техник регуляризации

### Импорт библиотек

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms.v2 as v2
import matplotlib.pyplot as plt
import numpy as np

### Подготовка данных

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 128

transform = v2.Compose([
    v2.ToTensor(),
    v2.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(
    root='./data', train=True, download=False, transform=transform)
test_dataset = torchvision.datasets.MNIST(
    root='./data', train=False, download=False, transform=transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False)

### Модель

In [11]:
class BaseModel(nn.Module):
    def __init__(self, use_dropout=False, dropout_rate=0.5, use_batchnorm=False):
        super().__init__()
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        
        if use_batchnorm:
            self.bn1 = nn.BatchNorm2d(32)
            self.bn2 = nn.BatchNorm2d(64)
        
        self.fc1 = nn.Linear(64*5*5, 128)
        self.fc2 = nn.Linear(128, 10)
        
        if use_dropout:
            self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.conv1(x)
        if self.use_batchnorm:
            x = self.bn1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        if self.use_dropout:
            x = self.dropout(x)

        x = self.conv2(x)
        if self.use_batchnorm:
            x = self.bn2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        if self.use_dropout:
            x = self.dropout(x)
        
        # Классификатор
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        if self.use_dropout:
            x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### Функция для обучения

In [12]:
def train_model(config, num_epochs=15):
    model = BaseModel(
        use_dropout=config['dropout'], 
        dropout_rate=config['dropout_rate'],
        use_batchnorm=config['batchnorm']
    ).to(device)
    
    # L2 регуляризация, если включена
    optimizer = torch.optim.Adam(
        model.parameters(), 
        lr=0.001, 
        weight_decay=config['weight_decay']
    )
    
    train_losses = []
    val_accuracies = []
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            y_pred = model(data)
            loss = F.nll_loss(y_pred, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        model.eval()
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                y_pred = model(data)
                pred = y_pred.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
        
        accuracy = 100. * correct / len(test_loader.dataset)
        train_losses.append(epoch_loss / len(train_loader))
        val_accuracies.append(accuracy)
        
        print(f"Config: {config['name']} | Epoch {epoch+1}/{num_epochs} | Loss: {train_losses[-1]:.4f} | Accuracy: {accuracy:.2f}%")
    
    # Сбор весов для анализа
    weights = []
    for name, param in model.named_parameters():
        if 'weight' in name and 'bn' not in name:  # Исключаем BatchNorm
            weights.append(param.data.cpu().numpy().flatten())
    weights = np.concatenate(weights)
    
    return {
        'config': config,
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'final_accuracy': val_accuracies[-1],
        'weights': weights
    }

### Запуск экспериментов

In [16]:
experiments = [
    # Без регуляризации
    {'name': 'Без регуляризации', 'dropout': False, 'dropout_rate': 0, 'batchnorm': False, 'weight_decay': 0},
    
    # Dropout
    {'name': 'Dropout 0.1', 'dropout': True, 'dropout_rate': 0.1, 'batchnorm': False, 'weight_decay': 0},
    {'name': 'Dropout 0.3', 'dropout': True, 'dropout_rate': 0.3, 'batchnorm': False, 'weight_decay': 0},
    {'name': 'Dropout 0.5', 'dropout': True, 'dropout_rate': 0.5, 'batchnorm': False, 'weight_decay': 0},
    
    # BatchNorm
    {'name': 'BatchNorm', 'dropout': False, 'dropout_rate': 0, 'batchnorm': True, 'weight_decay': 0},
    
    # Dropout + BatchNorm
    {'name': 'Dropout 0.3 + BatchNorm', 'dropout': True, 'dropout_rate': 0.3, 'batchnorm': True, 'weight_decay': 0},
    
    # L2 регуляризация
    {'name': 'L2 (weight_decay=0.01)', 'dropout': False, 'dropout_rate': 0, 'batchnorm': False, 'weight_decay': 0.01}
]

# Запуск экспериментов
results = []
for exp_config in experiments:
    print(f"\n=== Эксперимент: {exp_config['name']} ===")
    result = train_model(exp_config)
    results.append(result)

print("\n=== Результаты экспериментов: ===")
for res in results:
    print(f"{res['config']['name']}: Точность {res['final_accuracy']:.2f}%")


=== Эксперимент: Без регуляризации ===
Config: Без регуляризации | Epoch 1/15 | Loss: 0.1753 | Accuracy: 98.43%
Config: Без регуляризации | Epoch 2/15 | Loss: 0.0481 | Accuracy: 98.67%
Config: Без регуляризации | Epoch 3/15 | Loss: 0.0323 | Accuracy: 99.04%
Config: Без регуляризации | Epoch 4/15 | Loss: 0.0235 | Accuracy: 98.72%
Config: Без регуляризации | Epoch 5/15 | Loss: 0.0189 | Accuracy: 99.22%
Config: Без регуляризации | Epoch 6/15 | Loss: 0.0133 | Accuracy: 99.10%
Config: Без регуляризации | Epoch 7/15 | Loss: 0.0122 | Accuracy: 99.13%
Config: Без регуляризации | Epoch 8/15 | Loss: 0.0094 | Accuracy: 98.97%
Config: Без регуляризации | Epoch 9/15 | Loss: 0.0093 | Accuracy: 99.17%
Config: Без регуляризации | Epoch 10/15 | Loss: 0.0071 | Accuracy: 99.12%
Config: Без регуляризации | Epoch 11/15 | Loss: 0.0065 | Accuracy: 99.12%
Config: Без регуляризации | Epoch 12/15 | Loss: 0.0047 | Accuracy: 99.08%
Config: Без регуляризации | Epoch 13/15 | Loss: 0.0044 | Accuracy: 99.09%
Config:

## 3.2. Адаптивная регуляризация

### Импорт библиотек

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms.v2 as v2

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

from collections import defaultdict
import os

### Подготовка данных

In [32]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Гиперпаратры
config = {
    'batch_size': 128,
    'num_epochs': 15,
    'base_lr': 0.001,
    'weight_decay': 0.001,  #  L2
    'dropout_range': (0.1, 0.5),  # Для адаптивного dropout
    'bn_momentums': [0.1, 0.5, 0.9]  # Для экспериментов с BatchNorm
}

def prepare_data(batch_size):
    """
    Загрузка и подготовка данных
    """
    transform = v2.Compose([
        v2.ToTensor(),
        v2.Normalize((0.1307,), (0.3081,))
    ])
    
    train_set = torchvision.datasets.MNIST(
        root='./data', train=True, download=True, transform=transform)
    test_set = torchvision.datasets.MNIST(
        root='./data', train=False, download=True, transform=transform)
    
    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        test_set, batch_size=batch_size, shuffle=False)
    
    return train_loader, test_loader

train_loader, test_loader = prepare_data(config['batch_size'])

### Измененная адаптивная модель

In [33]:
class AdaptiveBaseModel(nn.Module):
    def __init__(self, reg_config):
        super().__init__()
        self.reg_config = reg_config
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            self._make_bn_layer(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            self._make_dropout_layer(),
            
            nn.Conv2d(32, 64, 3, 1),
            self._make_bn_layer(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            self._make_dropout_layer()
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(64*5*5, 128),
            nn.ReLU(),
            self._make_dropout_layer(fc=True),
            nn.Linear(128, 10)
        )
    
    def _make_bn_layer(self, num_features):
        """
        Создание BatchNorm слоя с заданным momentum
        """
        if self.reg_config['use_bn']:
            return nn.BatchNorm2d(num_features, momentum=self.reg_config['bn_momentum'])
        return nn.Identity()
    
    def _make_dropout_layer(self, fc=False):
        """
        Создание Dropout слоя с адаптивным коэффициентом
        """
        if not self.reg_config['use_dropout']:
            return nn.Identity()
        
        rate = self.reg_config['dropout_rate']
        if fc and self.reg_config['fc_dropout_factor'] > 1:
            rate *= self.reg_config['fc_dropout_factor']
        
        return nn.Dropout(rate)
    
    def update_dropout(self, new_rate):
        """
        Адаптивное обновление dropout rate
        """
        if self.reg_config['use_dropout']:
            self.reg_config['dropout_rate'] = new_rate
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.fc_layers(x)
        return F.log_softmax(x, dim=1)
    
    def get_weights_stats(self):
        """
        Сбор статистик по весам для дальнейшего анализа
        """
        stats = defaultdict(list)
        for name, param in self.named_parameters():
            if 'weight' in name:
                w = param.data.cpu().numpy()
                stats['mean'].append(np.mean(w))
                stats['std'].append(np.std(w))
                stats['layer_names'].append(name)
        return stats

### Создание цикла обучения

In [55]:
def train_epoch(model, loader, optimizer, epoch, reg_config):
    model.train()
    total_loss = 0
    
    if reg_config['adaptive_dropout']:
        progress = epoch / config['num_epochs']
        new_rate = reg_config['dropout_range'][0] + (
            reg_config['dropout_range'][1] - reg_config['dropout_range'][0]) * progress
        model.update_dropout(new_rate)
    
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        y_pred = model(data)
        loss = F.nll_loss(y_pred, target)
        
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            y_pred = model(data)
            pred = y_pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    return 100. * correct / len(loader.dataset)

def train_model(reg_config):
    model = AdaptiveBaseModel(reg_config).to(device)
    optimizer = torch.optim.Adam(
        model.parameters(), 
        lr=config['base_lr'], 
        weight_decay=reg_config.get('weight_decay', 0)
    )
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 'max', patience=2, factor=0.5, verbose=True)
    
    history = {
        'train_loss': [],
        'val_acc': [],
        'weights_stats': []
    }
    
    for epoch in range(1, config['num_epochs'] + 1):
        train_loss = train_epoch(model, train_loader, optimizer, epoch, reg_config)
        val_acc = evaluate(model, test_loader)
        scheduler.step(val_acc)
        
        history['train_loss'].append(train_loss)
        history['val_acc'].append(val_acc)
        history['weights_stats'].append(model.get_weights_stats())

        message = f"|\tEpoch {epoch}/{config['num_epochs']}\t|\tLoss: {train_loss:.4f}\t|\tVal Accuracy: {val_acc:.2f}%"
        if reg_config['adaptive_dropout']:
            dropout_message = f"\t|\tТекущий dropout: {model.reg_config['dropout_rate']:.3f}\t|"
            print(message + dropout_message)
        else:
            print(message)
    
    return {
        'config': reg_config,
        'final_acc': history['val_acc'][-1],
        'history': history,
        'model': model
    }

### Подготовка конфигов

In [56]:
def get_experiment_configs():
    """
    Генерация конфигураций для экспериментов
    """
    base_config = {
        'use_bn': False,
        'use_dropout': False,
        'adaptive_dropout': False,
        'dropout_rate': 0.0,  
        'dropout_range': (0.0, 0.0),
        'fc_dropout_factor': 1.0,
        'bn_momentum': 0.1,  #
        'l2_lambda': 0.0,
        'weight_decay': 0.0 
    }
    
    experiments = []
    
    # Адаптивный Dropout
    experiments.append({
        **base_config,
        'name': 'Adaptive Dropout',
        'use_dropout': True,
        'adaptive_dropout': True,
        'dropout_range': (0.1, 0.5)
    })
    
    # BatchNorm вариации
    for momentum in config['bn_momentums']:
        experiments.append({
            **base_config,
            'name': f'BN momentum={momentum}',
            'use_bn': True,
            'bn_momentum': momentum
        })
    
    # Комбинированные подходы
    experiments.append({
        **base_config,
        'name': 'Адаптивный DO + BN (лучший)',
        'use_bn': True,
        'bn_momentum': 0.5,
        'use_dropout': True,
        'adaptive_dropout': True,
        'dropout_range': (0.1, 0.5)})
    
    experiments.append({
        **base_config,
        'name': 'L2 + Адаптивный DO + BN',
        'use_bn': True,
        'bn_momentum': 0.5,
        'use_dropout': True,
        'adaptive_dropout': True,
        'dropout_range': (0.1, 0.5),
        'weight_decay': config['weight_decay']})
    
    return experiments

### Обучение моделей

In [57]:
experiment_configs = get_experiment_configs()
results = []

for exp_config in experiment_configs:
    print(f"\n=== Эксперимент: {exp_config['name']} ===")
    result = train_model(exp_config)
    results.append(result)
    print(f"* Точность: {result['final_acc']:.2f}%")


=== Эксперимент: Adaptive Dropout ===
|	Epoch 1/15	|	Loss: 0.1791	|	Val Accuracy: 98.46%	|	Текущий dropout: 0.127	|
|	Epoch 2/15	|	Loss: 0.0479	|	Val Accuracy: 98.70%	|	Текущий dropout: 0.153	|
|	Epoch 3/15	|	Loss: 0.0339	|	Val Accuracy: 98.81%	|	Текущий dropout: 0.180	|
|	Epoch 4/15	|	Loss: 0.0235	|	Val Accuracy: 98.99%	|	Текущий dropout: 0.207	|
|	Epoch 5/15	|	Loss: 0.0184	|	Val Accuracy: 98.88%	|	Текущий dropout: 0.233	|
|	Epoch 6/15	|	Loss: 0.0147	|	Val Accuracy: 99.23%	|	Текущий dropout: 0.260	|
|	Epoch 7/15	|	Loss: 0.0120	|	Val Accuracy: 99.13%	|	Текущий dropout: 0.287	|
|	Epoch 8/15	|	Loss: 0.0102	|	Val Accuracy: 99.24%	|	Текущий dropout: 0.313	|
|	Epoch 9/15	|	Loss: 0.0088	|	Val Accuracy: 99.08%	|	Текущий dropout: 0.340	|
|	Epoch 10/15	|	Loss: 0.0072	|	Val Accuracy: 98.97%	|	Текущий dropout: 0.367	|
|	Epoch 11/15	|	Loss: 0.0067	|	Val Accuracy: 99.20%	|	Текущий dropout: 0.393	|
|	Epoch 12/15	|	Loss: 0.0016	|	Val Accuracy: 99.39%	|	Текущий dropout: 0.420	|
|	Epoch 13/15	|	Loss: 

### Анализ результатов

Исходя из функции потерь и точности, можно сделать вывод, что лучше с поставленной задачей справилась модель с адаптивным dropout.