In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn

In [2]:
class MyModel(nn.Module):
    def __init__(self,):
        super().__init__()

        self.layer1 = nn.Linear(1, 100)
        self.activ = nn.ReLU()
        self.layer2 = nn.Linear(100, 1)
        self.do = nn.Dropout(0.1)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.activ(x)
        x = self.do(x)
        x = self.layer2(x)
                        
        return x

In [3]:
x = (torch.rand(1000)-0.4)*3
y = x**3 + torch.randn(1)*0.2

In [4]:
model = MyModel()
model.train() 
optim = torch.optim.Adam(model.parameters())
loss_func = nn.MSELoss()

In [5]:
n_epochs = 10
for epoch in range(n_epochs):
    for i, (val, t) in enumerate(zip(x, y)):
        optim.zero_grad()
        predict = model(val.unsqueeze(dim=0))
        loss = loss_func(predict, t)
        loss.backward()
        optim.step()
    print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')

  return F.mse_loss(input, target, reduction=self.reduction)


epoch: 0, step: 999, loss: 0.05419119447469711
epoch: 1, step: 999, loss: 0.0014200137229636312
epoch: 2, step: 999, loss: 0.006938278209418058
epoch: 3, step: 999, loss: 0.006856575608253479
epoch: 4, step: 999, loss: 0.00036289350828155875
epoch: 5, step: 999, loss: 0.0002590734220575541
epoch: 6, step: 999, loss: 0.005557742901146412
epoch: 7, step: 999, loss: 0.001379823312163353
epoch: 8, step: 999, loss: 0.002425295999273658
epoch: 9, step: 999, loss: 0.0037736091762781143


In [6]:
class LinearLayer:
    def __init__(self, n_inp, n_out, activation='sigmoid'):
        self.w = np.random.randn(n_out, n_inp) * 0.1
        self.b = np.random.randn(n_out, 1) * 0.1
        if activation == 'sigmoid':
            self.activ = sigmoid
        if activation == 'relu':
            self.activ = relu
        elif activation == 'None':
            self.activ = None
        else:
            raise Exception(f'Unknown activation "{activation}"')
        self._clear_state()

    def _clear_state(self):
        self.lin = None
        self.inp = None
        self.d_w = None
        self.d_b = None

    def forward(self, x):
        self.inp = x
        self.lin = np.dot(self.w, x) + self.b
        activ = self.activ(self.lin) if self.activ is not None else self.lin

        return activ

    def backward(self, grad): # grad = d L / d z    Dout 
        # grad * dz / d lin
        if self.activ == sigmoid:
            grad_lin = sigmoid_backward(grad, self.lin) 
        elif self.activ == relu:
            grad_lin = relu_backward(grad, self.lin)
        else:
            grad_lin = grad
        # grad_lin * d lin / d w 
        m = self.inp.shape[1]
        self.d_w = np.dot(grad_lin, self.inp.T) / m    # d_in dOut
        # grad_lin * d lin / d b 
        self.d_b = np.sum(grad_lin, axis=1, keepdims=True) / m

        grad = np.dot(self.w.T, grad_lin)

        return grad

In [7]:
class SGDMomentum1:
    def __init__(self, model: LinearLayer, lr=0.001, momentum=0.99):
        self.lr = lr
        self.m = momentum
        self.model = model

        self.vel_w = np.zeros_like(model.w)
        self.vel_b = np.zeros_like(model.b)

    def step(self):
        self.vel_w = self.m * self.vel_w - self.lr * self.model.d_w
        self.vel_b = self.m * self.vel_b - self.lr * self.model.d_b

        self.model.w += self.vel_w
        self.model.b += self.vel_b

    def zero_grad(self):
        self.model.d_w = np.zeros_like(self.model.d_w)
        self.model.d_b = np.zeros_like(self.model.d_b)

Adam Optimizer

In [8]:
class AdamOptim():
    def __init__(self, eta=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.m_dw, self.v_dw = 0, 0
        self.m_db, self.v_db = 0, 0
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.eta = eta
    def update(self, t, w, b, dw, db):
        ## dw, db из текущей минибатча
        ## momentum beta 1
        # *** веса *** #
        self.m_dw = self.beta1*self.m_dw + (1-self.beta1)*dw
        # *** смещения *** #
        self.m_db = self.beta1*self.m_db + (1-self.beta1)*db

        ## rms beta 2
        # *** веса *** #
        self.v_dw = self.beta2*self.v_dw + (1-self.beta2)*(dw**2)
        # *** смещения *** #
        self.v_db = self.beta2*self.v_db + (1-self.beta2)*(db)

        ## коррекция смещения
        m_dw_corr = self.m_dw/(1-self.beta1**t)
        m_db_corr = self.m_db/(1-self.beta1**t)
        v_dw_corr = self.v_dw/(1-self.beta2**t)
        v_db_corr = self.v_db/(1-self.beta2**t)

        ## обновить веса и смещения
        w = w - self.eta*(m_dw_corr/(np.sqrt(v_dw_corr)+self.epsilon))
        b = b - self.eta*(m_db_corr/(np.sqrt(v_db_corr)+self.epsilon))
        return w, b

Решение квадратного уравнения

In [5]:
import torch
from torch import nn
from torch.utils.data import Dataset

class QuadraticEquationDataset(Dataset):
    def __init__(self, num_samples):
        self.num_samples = num_samples

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        a = torch.randn(1)
        b = torch.randn(1)
        c = torch.randn(1)

        D = b**2 - 4*a*c
        if D < 0:
            x1 = float('nan')
            x2 = float('nan')
        else:
            x1 = (-b + torch.sqrt(D)) / (2*a)
            x2 = (-b - torch.sqrt(D)) / (2*a)

        return torch.Tensor([a, b, c]), torch.Tensor([x1, x2])

In [44]:
from sklearn.model_selection import train_test_split

dataset = QuadraticEquationDataset(num_samples=1000)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)


In [45]:
trainloader = torch.utils.data.DataLoader(train_dataset, 
                                          #batch_size=32,
                                          shuffle=True, 
                                          num_workers=2, 
                                          )

testloader = torch.utils.data.DataLoader(test_dataset, 
                                          #batch_size=32,
                                          shuffle=True, 
                                          num_workers=2, 
                                          )

In [46]:
class QuadraticEquationModel(nn.Module):
    def __init__(self):
        super(QuadraticEquationModel, self).__init__()
        self.linear1 = nn.Linear(3, 16)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(16, 8)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(8, 2)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.linear2(x)
        x = self.relu2(x)
        x = self.linear3(x)
        return x

In [42]:
model = QuadraticEquationModel()
optim = torch.optim.SGD(model.parameters(), lr=0.01)
loss_func = nn.MSELoss()

In [47]:


num_epochs = 10


for epoch in range(num_epochs):
    for i, (inputs, t) in enumerate(trainloader, 0):
        optim.zero_grad()
        predict = model(inputs)
        print(predict)
        loss = loss_func(predict, t)
        print(loss)
        loss.backward()
        optim.step()
    print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')

tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
epoch: 0, step: 7, loss: nan
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, nan]], grad_fn=<AddmmBackward0>)
tensor(nan, grad_fn=<MseLossBackward0>)
tensor([[nan, 

In [25]:
model.eval()
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        outputs = model(inputs)
        predicted = outputs
        print(predicted, labels)

tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[ 0.5180, -0.8559]])
tensor([[nan, nan]]) tensor([[ 6.2646, -0.0524]])
tensor([[nan, nan]]) tensor([[-84.3136,   2.8203]])
tensor([[nan, nan]]) tensor([[  0.1294, -14.5525]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[ 1.5542, -1.0139]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[ 1.2038, -1.1941]])
tensor([[nan, nan]]) tensor([[ 1.2616, -0.5858]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[ 0.3271, -1.4031]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[-1.1750,  1.3107]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[0.0014, 1.1258]])
tensor([[nan, nan]]) tensor([[nan, nan]])
tensor([[nan, nan]]) tensor([[-0.0892,  4.2295]])
tensor([[nan, nan]]) tensor(