<a href="https://colab.research.google.com/github/nataliashalaeva/ml/blob/main/lab_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Выбор и загрузка набора данных. Загрузить датасет MNIST, содержащий
изображения рукописных цифр, используя torchvision.datasets.MNIST. Преобразовать
данные в формат тензоров и нормализуйте их с помощью torchvision.transforms.
Организовать данные в пакеты (batch) для обучения, используя torch.utils.data.DataLoade

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

batch_size = 64
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.43MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 157kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.50MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.12MB/s]


2. Определение архитектуры модели. Создать класс модели нейронной сети на
основе torch.nn.Module. Определить слои модели в конструкторе (__init__) и опишите
прямой проход данных (forward) через слои

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

3. Выбор функции потерь и оптимизатора. Выберать функцию потерь (например,
nn.CrossEntropyLoss) и оптимизатор (например, torch.optim.Adam) для обучения модели.

In [None]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


Using device: cpu


4. Обучение модели. Написать цикл обучения, который перебирает пакеты данных,
выполняет прямой проход, вычисляет потери, выполняет обратное распространение
ошибки и обновляет параметры модели.

In [None]:
import time

num_epochs = 3
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (batch_idx + 1) % 200 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{batch_idx + 1}/{len(train_loader)}], Loss: {running_loss / 200:.4f}')
            running_loss = 0.0

end_time = time.time()
training_time = end_time - start_time
print(f"Training finished in {training_time:.2f} seconds")


Epoch [1/3], Step [200/938], Loss: 2.3131
Epoch [1/3], Step [400/938], Loss: 2.3140
Epoch [1/3], Step [600/938], Loss: 2.3132
Epoch [1/3], Step [800/938], Loss: 2.3129
Epoch [2/3], Step [200/938], Loss: 2.3143
Epoch [2/3], Step [400/938], Loss: 2.3131
Epoch [2/3], Step [600/938], Loss: 2.3131
Epoch [2/3], Step [800/938], Loss: 2.3134
Epoch [3/3], Step [200/938], Loss: 2.3133
Epoch [3/3], Step [400/938], Loss: 2.3125
Epoch [3/3], Step [600/938], Loss: 2.3138
Epoch [3/3], Step [800/938], Loss: 2.3130
Training finished in 45.76 seconds


In [None]:
class MyNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.0):
        super(MyNeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
import torch.nn.functional as F
input_size = 10
hidden_size = 20
output_size = 5
dropout_rate = 0.2

model = MyNeuralNet(input_size, hidden_size, output_size, dropout_rate)
input_data = torch.randn(1, input_size)
output_data = model(input_data)
print("Размер входных данных:", input_data.shape)
print("Размер выходных данных:", output_data.shape)
print(model)


Размер входных данных: torch.Size([1, 10])
Размер выходных данных: torch.Size([1, 5])
MyNeuralNet(
  (fc1): Linear(in_features=10, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=5, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


5. Оценка производительности. Оценить точность (accuracy) модели на тестовом
наборе данных.

In [17]:

correct = 0
total = 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Точность модели на тестовом наборе: {accuracy:.2f}%')

Точность модели на тестовом наборе: 97.25%


6. Эксперименты с архитектурой и гиперпараметрами. Изменить архитектуру
модели (например, добавьте больше сверточных слоев, увеличьте количество фильтров) и
измените гиперпараметры обучения (например, скорость обучения, размер пакета), чтобы
увидеть, как это повлияет на производительность. Провести несколько экспериментов и
зафиксировать результаты.


In [None]:
class DeeperCNN(nn.Module):
    def __init__(self):
        super(DeeperCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 3 * 3, 256)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

batch_size = 64
learning_rate = 0.001
num_epochs = 3

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


def train_and_evaluate(model, learning_rate, batch_size, num_epochs, model_name):
    print(f"Training {model_name} with lr={learning_rate}, batch_size={batch_size}, epochs={num_epochs}")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 199:
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
                running_loss = 0.0
    print('Finished Training')

    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')
    return accuracy

results = {}

model1 = SimpleCNN()
accuracy1 = train_and_evaluate(model1, learning_rate, batch_size, num_epochs, "SimpleCNN Default")
results["SimpleCNN Default"] = accuracy1

model2 = SimpleCNN()
accuracy2 = train_and_evaluate(model2, 0.0005, batch_size, num_epochs, "SimpleCNN Smaller LR")
results["SimpleCNN Smaller LR"] = accuracy2

model3 = SimpleCNN()
accuracy3 = train_and_evaluate(model3, learning_rate, 128, num_epochs, "SimpleCNN Larger Batch")
results["SimpleCNN Larger Batch"] = accuracy3

model4 = DeeperCNN()
accuracy4 = train_and_evaluate(model4, learning_rate, batch_size, num_epochs, "DeeperCNN Default")
results["DeeperCNN Default"] = accuracy4

model5 = DeeperCNN()
accuracy5 = train_and_evaluate(model5, 0.0005, batch_size, num_epochs, "DeeperCNN Smaller LR")
results["DeeperCNN Smaller LR"] = accuracy5

print("\nResults:")
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}%")

Using device: cpu
Training SimpleCNN Default with lr=0.001, batch_size=64, epochs=3
[1,   200] loss: 0.542
[1,   400] loss: 0.136
[1,   600] loss: 0.089
[1,   800] loss: 0.077
[2,   200] loss: 0.061
[2,   400] loss: 0.050
[2,   600] loss: 0.053
[2,   800] loss: 0.048
[3,   200] loss: 0.035
[3,   400] loss: 0.039
[3,   600] loss: 0.040
[3,   800] loss: 0.036
Finished Training
Accuracy of the network on the 10000 test images: 98.72%
Training SimpleCNN Smaller LR with lr=0.0005, batch_size=64, epochs=3
[1,   200] loss: 0.805
[1,   400] loss: 0.192
[1,   600] loss: 0.132
[1,   800] loss: 0.105
[2,   200] loss: 0.078
[2,   400] loss: 0.075
[2,   600] loss: 0.070
[2,   800] loss: 0.064
[3,   200] loss: 0.053
[3,   400] loss: 0.049
[3,   600] loss: 0.050
[3,   800] loss: 0.052
Finished Training
Accuracy of the network on the 10000 test images: 98.46%
Training SimpleCNN Larger Batch with lr=0.001, batch_size=128, epochs=3
[1,   200] loss: 0.476
[1,   400] loss: 0.130
[1,   600] loss: 0.093
[1,

7. Эксперименты с функциями активации. Заменить функцию активации на
другие и повторите обучение. Сравнить влияние различных функций активации на
производительность.

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, activation_fn):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.activation1 = activation_fn
        self.fc2 = nn.Linear(256, 128)
        self.activation2 = activation_fn
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.activation1(self.fc1(x))
        x = self.activation2(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
activations = {
    "ReLU": nn.ReLU(),
    "LeakyReLU": nn.LeakyReLU(0.01),
    "ELU": nn.ELU(),
    "Sigmoid": nn.Sigmoid(),
    "Tanh": nn.Tanh(),
    "Swish": nn.SiLU(),
}

results = {}

for name, activation in activations.items():
    print(f"\nТест: {name}")
    model = SimpleNN(activation).to(device)
    accuracy = train_and_evaluate(model, learning_rate=0.001, batch_size=64, num_epochs=3, model_name=name)
    results[name] = accuracy

print("\nResults::")
for name, acc in results.items():
    print(f"{name}: {acc:.2f}%")



Тест: ReLU
Training ReLU with lr=0.001, batch_size=64, epochs=3
[1,   200] loss: 0.632
[1,   400] loss: 0.356
[1,   600] loss: 0.269
[1,   800] loss: 0.226
[2,   200] loss: 0.172
[2,   400] loss: 0.160
[2,   600] loss: 0.152
[2,   800] loss: 0.147
[3,   200] loss: 0.116
[3,   400] loss: 0.115
[3,   600] loss: 0.121
[3,   800] loss: 0.110
Finished Training
Accuracy of the network on the 10000 test images: 95.92%

Тест: LeakyReLU
Training LeakyReLU with lr=0.001, batch_size=64, epochs=3
[1,   200] loss: 0.636
[1,   400] loss: 0.331
[1,   600] loss: 0.249
[1,   800] loss: 0.214
[2,   200] loss: 0.169
[2,   400] loss: 0.143
[2,   600] loss: 0.148
[2,   800] loss: 0.141
[3,   200] loss: 0.114
[3,   400] loss: 0.105
[3,   600] loss: 0.110
[3,   800] loss: 0.107
Finished Training
Accuracy of the network on the 10000 test images: 96.58%

Тест: ELU
Training ELU with lr=0.001, batch_size=64, epochs=3
[1,   200] loss: 0.554
[1,   400] loss: 0.306
[1,   600] loss: 0.244
[1,   800] loss: 0.210
[2,