In [1]:
import torch

## Тензоры и операции над ними

In [2]:
t = torch.tensor([1],
                 dtype=torch.float32)

In [3]:
a = torch.tensor([[1, 2]], dtype=torch.float32)
b = torch.tensor([[3, 4]], dtype=torch.float32)

In [4]:
a.shape

torch.Size([1, 2])

### Арифметика

In [5]:
a + b

tensor([[4., 6.]])

In [6]:
a * b

tensor([[3., 8.]])

In [7]:
a @ b.T

tensor([[11.]])

In [8]:
torch.matmul(a, b.T)

tensor([[11.]])

In [9]:
torch.sum(a)

tensor(3.)

## Автоматическое дифференцирование

In [10]:
def foo(x: torch.Tensor) -> torch.Tensor:
    return torch.sum(torch.abs(x - 2))

In [11]:
tunable_a = torch.tensor([[1.0, 2.0]], requires_grad=True)

In [12]:
tunable_a

tensor([[1., 2.]], requires_grad=True)

In [13]:
r = foo(tunable_a)

In [14]:
r

tensor(1., grad_fn=<SumBackward0>)

In [15]:
r.backward()

In [16]:
tunable_a.grad

tensor([[-1.,  0.]])

## Градиентный спуск

In [17]:
def mismatch(expected: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
    return torch.sum((expected - x) ** 2)

In [18]:
etalon = torch.tensor([[1.0, 2.0]], dtype=torch.float32)
x = torch.tensor([[1.5, 4.0]],
                 requires_grad=True,
                 dtype=torch.float32)

In [19]:
mismatch(etalon, x)

tensor(4.2500, grad_fn=<SumBackward0>)

In [20]:
q = mismatch(etalon, x)

In [21]:
q.backward()

In [22]:
etalon

tensor([[1., 2.]])

In [23]:
x.grad

tensor([[1., 4.]])

In [24]:
# НЕ x = x - x.grad * 0.1
x.data = x.data - x.grad.data * 0.1

In [25]:
x

tensor([[1.4000, 3.6000]], requires_grad=True)

In [26]:
x.grad

tensor([[1., 4.]])

In [27]:
def gradient_descent(etalon, x, n_steps, alpha=0.1):
    for _ in range(n_steps):
        q = mismatch(etalon, x)
        print(x, '->', q.item())
        # Вычисляем градиент для x (d mismatch/dx)
        q.backward()
        # Делаем шаг в противоположном направлении
        x.data = x.data - x.grad.data * alpha
        x.grad.data.zero_()
    return x

In [28]:
etalon = torch.tensor([[1.0, 2.0]], dtype=torch.float32)
x = torch.tensor([[1.5, 4.0]],
                 requires_grad=True,
                 dtype=torch.float32)

gradient_descent(etalon, x, 10)

tensor([[1.5000, 4.0000]], requires_grad=True) -> 4.25
tensor([[1.4000, 3.6000]], requires_grad=True) -> 2.7199997901916504
tensor([[1.3200, 3.2800]], requires_grad=True) -> 1.740799903869629
tensor([[1.2560, 3.0240]], requires_grad=True) -> 1.1141119003295898
tensor([[1.2048, 2.8192]], requires_grad=True) -> 0.7130317091941833
tensor([[1.1638, 2.6554]], requires_grad=True) -> 0.4563402235507965
tensor([[1.1311, 2.5243]], requires_grad=True) -> 0.29205769300460815
tensor([[1.1049, 2.4194]], requires_grad=True) -> 0.18691684305667877
tensor([[1.0839, 2.3355]], requires_grad=True) -> 0.1196267157793045
tensor([[1.0671, 2.2684]], requires_grad=True) -> 0.0765610784292221


tensor([[1.0537, 2.2147]], requires_grad=True)

In [29]:
class MyGDOptimizer:

    def __init__(self, what_to_tune, alpha=0.1):
        # Список тензоров, которые будем корректировать
        self.parameters = what_to_tune
        self.alpha = alpha
    
    def step(self):
        for parameter in self.parameters:
            parameter.data = parameter.data - parameter.grad.data * self.alpha
            parameter.grad.data.zero_()
             

In [30]:
def gradient_descent2(etalon, x, n_steps, optimizer):
    for _ in range(n_steps):
        q = mismatch(etalon, x)
        print(x, '->', q.item())
        # Вычисляем градиент для x (d mismatch/dx)
        q.backward()
        # Делаем шаг в противоположном направлении
        optimizer.step()
    return x

In [31]:
etalon = torch.tensor([[1.0, 2.0]], dtype=torch.float32)
x = torch.tensor([[1.5, 4.0]],
                 requires_grad=True,
                 dtype=torch.float32)
gradient_descent2(etalon, x, 10, MyGDOptimizer([x], 0.1))

tensor([[1.5000, 4.0000]], requires_grad=True) -> 4.25
tensor([[1.4000, 3.6000]], requires_grad=True) -> 2.7199997901916504
tensor([[1.3200, 3.2800]], requires_grad=True) -> 1.740799903869629
tensor([[1.2560, 3.0240]], requires_grad=True) -> 1.1141119003295898
tensor([[1.2048, 2.8192]], requires_grad=True) -> 0.7130317091941833
tensor([[1.1638, 2.6554]], requires_grad=True) -> 0.4563402235507965
tensor([[1.1311, 2.5243]], requires_grad=True) -> 0.29205769300460815
tensor([[1.1049, 2.4194]], requires_grad=True) -> 0.18691684305667877
tensor([[1.0839, 2.3355]], requires_grad=True) -> 0.1196267157793045
tensor([[1.0671, 2.2684]], requires_grad=True) -> 0.0765610784292221


tensor([[1.0537, 2.2147]], requires_grad=True)

In [32]:
q =mismatch(etalon, x)

## Логистическая регрессия "на коленке"

## Многослойная нейронная сеть