In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Normalize, Lambda
import matplotlib.pyplot as plt

# 1. Грузим данные

In [None]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [None]:
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

In [None]:
X_sample, y_sample = training_data[0]
print(X_sample)
print(y_sample)

In [None]:
X_sample.shape

# Создаем собственный датасет

In [None]:
import pandas as pd
import numpy as np


class CustomImageDataset(Dataset):
    def __init__(self, data_file, transform=None, target_transform=None):
        self.data = pd.read_csv(data_file)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.reshape(28, 28).astype(np.uint8)
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [None]:
!wget -q https://media.githubusercontent.com/media/fpleoni/fashion_mnist/master/fashion-mnist_train.csv
!wget -q https://media.githubusercontent.com/media/fpleoni/fashion_mnist/master/fashion-mnist_test.csv

In [None]:
train = CustomImageDataset('./fashion-mnist_train.csv', 
                           transform=ToTensor(), 
                           target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)))
test = CustomImageDataset('./fashion-mnist_test.csv', 
                          transform=ToTensor(),
                          target_transform=Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1)))

In [None]:
X_sample, y_sample = train[0]

In [None]:
X_sample

In [None]:
X_sample.shape

In [None]:
y_sample

In [None]:
y_sample.shape

# Познаем даталоадеры

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test, batch_size=64, shuffle=True)

In [None]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")

# 2. Собираем нейросеть

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
from torch import nn


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
model = NeuralNetwork().to(device)
print(model)

In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

# Смотрим детально в слои сети

In [None]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

In [None]:
input_image

In [None]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

In [None]:
flat_image

In [None]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

In [None]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [None]:
logits.shape

In [None]:
logits

In [None]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
pred_probab

In [None]:
pred_probab.argmax(dim=1)

In [None]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

# 3. Тренируем сеть

#### Инициализируем модель

In [None]:
model = NeuralNetwork()

#### Инициализируем параметры

In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

#### Инициализируем функцию потерь

In [None]:
loss_fn = nn.CrossEntropyLoss()

#### Инициализируем оптимизатор

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

#### Описываем функции тренировки и тестирования модели

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y.argmax(1)).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

# 4. Изучаем свертки

![](https://distillery.com/wp-content/uploads/3-1.png)

![](https://wikidocs.net/images/page/164365/Fig_04_cnn_filter.png)

![](https://blog.kakaocdn.net/dn/lgBRU/btqWWtkNJhM/DsJwc0PXrbhS0n33Vqzgm1/img.gif)

![](https://www.mobiquity.com/hs-fs/hubfs/CNN03.gif?width=640&name=CNN03.gif)

![](https://i.stack.imgur.com/9bi5k.gif)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import cv2
import sklearn
import PIL
import torch
import torchvision


def show_image(image, figsize=(16, 9), reverse=True):
    plt.figure(figsize=figsize)
    if reverse:
        plt.imshow(image[...,::-1])
    else:
        plt.imshow(image)
    plt.axis('off')
    plt.show()
    
    
def show_grayscale_image(image, figsize=(16, 9)):
    plt.figure(figsize=figsize)
    plt.imshow(image, cmap='gray')
    plt.axis('off')
    plt.show()

In [None]:
layer = torch.nn.Conv2d(1, 1, kernel_size=(3, 3))
layer.weight

In [None]:
# вес доступен через .weight.data
layer.weight.data = torch.ones_like(layer.weight.data) 
layer.weight.data /= torch.sum(layer.weight.data)
layer.weight.data

In [None]:
layer.bias.data = torch.zeros_like(layer.bias.data)
layer.bias.data

In [None]:
layer.train()
layer.eval()

In [None]:
input_tensor = torch.arange(9).reshape(1, 1, 3, 3)
input_tensor.shape

1 - размер батча 1 - кол-во каналов (1, если черно-белый, 3, если цветной) 3 - высота 3 - ширина

In [None]:
input_tensor

In [None]:
output_tensor = layer(input_tensor.float())

In [None]:
output_tensor

## Какие бывают фильтры

In [None]:
from urllib.request import urlopen


req = urlopen('https://images.unsplash.com/photo-1608848461950-0fe51dfc41cb?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxleHBsb3JlLWZlZWR8M3x8fGVufDB8fHx8&w=1000&q=80')
arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
img = cv2.imdecode(arr, 0)

In [None]:
img.shape

In [None]:
show_grayscale_image(img)

In [None]:
to_tensor = torchvision.transforms.ToTensor()
img_tensor = to_tensor(img)
img_tensor

In [None]:
img_tensor.shape

In [None]:
# делаем так, чтобы нейросеть могла считать изображение

img_tensor = img_tensor.reshape((1, 1, 1500, 1000))

In [None]:
kernel_size = 50
layer = torch.nn.Conv2d(1, 1, kernel_size=kernel_size)

# обновляем веса свертки

layer.weight.data = torch.ones_like(layer.weight.data) 
layer.weight.data /= torch.sum(layer.weight.data)

In [None]:
layer.weight.data

In [None]:
output_tensor = layer(img_tensor)
output_tensor.shape

In [None]:
import torchvision.transforms as transforms

# функция, переводящее тензор в PIL-изображение
to_pil_image = transforms.ToPILImage()
output_img = to_pil_image(output_tensor.squeeze(0))

In [None]:
show_grayscale_image(output_img)

In [None]:
def show_image_with_kernel(img_input, kernel):
    layer = torch.nn.Conv2d(1, 1, kernel_size=3)
    layer.weight.data = our_kernel.reshape(1, 1, 3, 3)
    output_tensor = layer(img_input)
    output_img = to_pil_image(output_tensor.squeeze(0))
    show_grayscale_image(output_img)

In [None]:
our_kernel = torch.tensor([[0, -1, 0], 
                           [-1, 4, -1], 
                           [0, -1, 0]], dtype=torch.float32)
our_kernel = our_kernel.reshape(1, 1, 3, 3)
show_image_with_kernel(img_tensor, our_kernel)

In [None]:
our_kernel = torch.tensor([[-1, -1, -1], 
                           [-1, 8, -1], 
                           [-1, -1, -1]], dtype=torch.float32)
our_kernel = our_kernel.reshape(1, 1, 3, 3)
show_image_with_kernel(img_tensor, our_kernel)

In [None]:
our_kernel = torch.tensor([[0, -1, 0], 
                           [-1, 5, -1], 
                           [0, -1, 0]], dtype=torch.float32)
our_kernel = our_kernel.reshape(1, 1, 3, 3)
show_image_with_kernel(img_tensor, our_kernel)

In [None]:
our_kernel = torch.tensor([[1, 0, 0], 
                           [0, 1, 0], 
                           [0, 0, 1]], dtype=torch.float32)
our_kernel = our_kernel.reshape(1, 1, 3, 3)
show_image_with_kernel(img_tensor, our_kernel)

In [None]:
our_kernel = torch.tensor([[1, 0, 1], 
                           [0, 1, 0], 
                           [1, 0, 1]], dtype=torch.float32)
our_kernel = our_kernel.reshape(1, 1, 3, 3)
show_image_with_kernel(img_tensor, our_kernel)

![](https://miro.medium.com/v2/resize:fit:738/0*4tHPDjvQXX9obrhK.gif)

![](https://qph.cf2.quoracdn.net/main-qimg-c05ec72225e19614f0cc10385d27525c)

![](https://blog.ca.meron.dev/images/Convolution.gif)

![](https://i.stack.imgur.com/ptDPZ.gif)

![](https://lukeguerdan.com/assets/img/TCNfigureA.png)

![](https://theaisummer.com/static/490be17ee7f19b78003c3fdf5a6bbafc/83b75/receptive-field-in-convolutional-networks.png)

![](https://i.ytimg.com/vi/KuXjwB4LzSA/maxresdefault.jpg)

![](https://saturncloud.io/images/blog/a-comprehensive-guide-to-convolutional-neural-networks-the-eli5-way.jpg)

![](https://learnopencv.com/wp-content/uploads/2023/01/tensorflow-keras-cnn-vgg-architecture-1024x611.png)

![](https://nico-curti.github.io/NumPyNet/NumPyNet/images/maxpool.gif)

Визуализация сверточных слоев https://distill.pub/2017/feature-visualization/

# Теперь пора писать свою сверточную нейросеть

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms


transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [None]:
batch_size = 4

# dataset задаёт данные
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
# dataloader подгружает их
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
import matplotlib.pyplot as plt
import numpy as np


def imshow(img):
    # убрать нормализацию
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# взять случайный батч изображений
dataiter = iter(trainloader)
images, labels = next(dataiter)

imshow(torchvision.utils.make_grid(images))
print('  '.join('{}'.format(classes[labels[j]]) for j in range(4)))

In [None]:
def train_model(net, criterion, optimizer, trainloader, num_epochs=5):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data

            # Давайте сами напишем код тут
            
            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 1999:
                print('Epoch {0}/{1}, iteration {2}, loss: {3:.3f}'.format(epoch + 1, num_epochs, i + 1, 
                                                                          running_loss / 2000))
                running_loss = 0.0
        print()

    print('Finished Training')
    
    return net

In [None]:
def all_accuracy(net, testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(
        100 * correct / total))

In [None]:
def class_accuracy(net, testloader):
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of {} : {} %'.format(
            classes[i], 100 * class_correct[i] / class_total[i]))

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class FeedForwardNet(nn.Module):
    def __init__(self):
        super(FeedForwardNet, self).__init__()
        self.fc1 = nn.Linear(3 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 10)
        
    def forward(self, x):
        x = x.view(-1, 3 * 32 * 32)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)      
        return x
    

net = FeedForwardNet()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
net = train_model(net, criterion, optimizer, trainloader, num_epochs=5)

In [None]:
all_accuracy(net, testloader)

In [None]:
class_accuracy(net, testloader)

## Свёрточная сеть для классификации

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
net = train_model(net, criterion, optimizer, trainloader, num_epochs=5)

In [None]:
# сохраним сеть
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)

imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('{}'.format(classes[labels[j]]) for j in range(batch_size)))

In [None]:
# загрузим сеть
net_loaded = Net()
net_loaded.load_state_dict(torch.load(PATH))

In [None]:
outputs = net_loaded(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('{}'.format(classes[predicted[j]]) for j in range(4)))

In [None]:
all_accuracy(net, testloader)

In [None]:
class_accuracy(net, testloader)