In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import random

def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

In [2]:
def dec2bin(x, n=7):
    """
    creating a binary list of integer non-negative x
    """
    u = 2 ** n - 1
    x = int(x)
    assert x >= 0, 'Input value x must be non-negative'
    assert x <= u, f'Input value x with n = {n} must be less than {u}'
    
    y = []
    if x == 0:
        for i in range(n):
            y.append(0)
    else:
        while x != 1:
            y.append(x % 2)
            x = x // 2
        y.append(x)
        delta = n - len(y)
        for i in range(delta):
            y.append(0)
        y.reverse()
    return y

In [3]:
def get_all_X(n_inputs):
    """
    for given number of variables returns
    all binary combinations of these variables
    """
    X = []
    for i in range(2**n_inputs):
        X += [dec2bin(i, n=n_inputs)]
    X = np.array(X)
    return X

In [4]:
def random_boolfunc(n_inputs):
    """
    for given number of variables returns
    the truth table, where output is random binary vector
    """
    X = get_all_X(n_inputs)
    y = np.random.randint(0, 2, size=(2**n_inputs, 1))
    return X, y
   
def _and(n_inputs=2):
    """
    for given number of variables returns
    the truth table of AND logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.zeros(shape=(2 ** n_inputs - 1, 1), dtype=int),
        np.array([[1]]),
        axis=0
    )
    return X, y
    
def _or(n_inputs=2):
    """
    for given number of variables returns
    the truth table of OR logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.array([[0]]),
        np.ones(shape=(2 ** n_inputs - 1, 1), dtype=int),
        axis=0
    )
    return X, y

def _xor(n_inputs=2):
    """
    for given number of variables returns
    the truth table of XOR logical gate
    """
    X = get_all_X(n_inputs)
    y = (np.sum(X, axis=1) % 2).reshape(-1, 1)
    return X, y

def to_dataframe(X, y):
    """
    for the truth table in form of two arrays 
    X [2 ** n_inputs, n_inputs] and y [2 ** n_inputs, 1]
    combine it to the form of Pandas DataFrame
    """
    data=np.concatenate((X, y), axis=1)
    n_inputs = X.shape[1]
    return pd.DataFrame(data=data, columns=[f'x{i}' for i in range(n_inputs, 0, -1)] + ['y'])

In [5]:
%%time

n = 2
# set_random_seed(23) # just for get a complex random function
# X, y = random_boolfunc(n_inputs=n)
X, y = _xor(2)
to_dataframe(X, y)

CPU times: user 1.21 ms, sys: 522 µs, total: 1.73 ms
Wall time: 1.68 ms


Unnamed: 0,x2,x1,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [6]:
X, y = torch.Tensor(X), torch.Tensor(y)

In [7]:
class Neuron(nn.Module):
    def __init__(self, n_inputs, activation=nn.Sigmoid()):
        super().__init__()
        self.fc = nn.Linear(n_inputs, 1)
        self.activation = activation
    
    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

In [8]:
def train(model, X, y, epochs, device, optimizer='Adam', 
          criterion=torch.nn.BCELoss(), verbose=True):
    
    len_dataset = X.shape[0]
    dataset = torch.utils.data.TensorDataset(X, y)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=len_dataset, shuffle=True)
    
    if optimizer=='Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    else:
        print('other optimizers not supported yet')
    
    log_epoch = epochs // 10
    for i in range(1, epochs + 1):
        epoch_loss = 0
        for batch in dataloader:
            X, y = batch
            optimizer.zero_grad()
            output = model(X.to(device))
            loss = criterion(output, y.to(device))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.cpu().item()
        epoch_loss /= len(dataloader)
        if verbose and i % log_epoch == 0:
            print(f'epoch {i}: loss {epoch_loss:.2f}')

def step(x):
    return torch.heaviside(x - 0.5, torch.tensor([[1.]]))

def check(model, X, y, device):
    out = model(X.to(device))
    step_out = step(out.to('cpu'))
    mask = (step_out != y)[:,0]  
    
    if any(mask):
        wrong_ans_numbers = [i for i in range(mask.shape[0]) if mask[i].item()]
        print(f'{type(model).__name__} gives wrong answers for samples {wrong_ans_numbers}')
        return X[mask], y[mask]
    else:
        print('Success!')
        return (None, None)

In [9]:
model = Neuron(n_inputs=n).to(device)

train(model, X, y, device=device, epochs=5000)


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch 500: loss 0.69
epoch 1000: loss 0.69
epoch 1500: loss 0.69
epoch 2000: loss 0.69
epoch 2500: loss 0.69
epoch 3000: loss 0.69
epoch 3500: loss 0.69
epoch 4000: loss 0.69
epoch 4500: loss 0.69
epoch 5000: loss 0.69


In [10]:
X_new, y_new = check(model, X, y, device=device)

Neuron gives wrong answers for samples [0, 3]


In [11]:
model2 = Neuron(n_inputs=n).to(device)

train(model2, X_new, y_new, device=device, epochs=5000)

epoch 500: loss 0.03
epoch 1000: loss 0.01
epoch 1500: loss 0.01
epoch 2000: loss 0.00
epoch 2500: loss 0.00
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [12]:
X_new2, y_new2 = check(model2, X_new, y_new, device=device)

Success!


In [13]:
from copy import deepcopy

# https://github.com/pytorch/pytorch/issues/36459#:~:text=Here%27s%20my%20definition%3A

class BigModel(nn.Module):
    def __init__(self, base_models : list, activation=nn.Sigmoid()):
        super().__init__()
        self.base_models = nn.ModuleList(deepcopy(base_models))
        
        # for model in self.base_models:
        #     for param in model.parameters():
        #         param.requires_grad = False
        
        self.n_hidden = len(base_models)
        self.fc = nn.Linear(self.n_hidden, 1)
        self.activation = activation
        self.merge = lambda x: torch.cat(x, dim=1)
    
    def forward(self, x):
        hidden_out = self.merge([module(x) for module in self.base_models])
        out = self.activation(self.fc(hidden_out))
        
        return out

In [14]:
big_model = BigModel([model, model2]).to(device)

train(big_model, X, y, device=device, epochs=5000)

epoch 500: loss 0.22
epoch 1000: loss 0.03
epoch 1500: loss 0.01
epoch 2000: loss 0.01
epoch 2500: loss 0.00
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [15]:
check(big_model, X, y, device=device)

Success!


(None, None)

In [16]:
%%time
counter = {'success':0, 'fail':0}

for _ in range(10):

    mlp = nn.Sequential(
        nn.Linear(n,2),
        nn.Sigmoid(),
        nn.Linear(2,1),
        nn.Sigmoid()
    ).to(device)

    train(mlp, X, y, device=device, epochs=5000, verbose=False)

    if check(mlp, X, y, device=device)[0] is None:
        counter['success'] += 1
    else:
        counter['fail'] += 1

print(counter)

Sequential gives wrong answers for samples [2, 3]
Sequential gives wrong answers for samples [3]
Sequential gives wrong answers for samples [1, 3]
Success!
Success!
Success!
Success!
Sequential gives wrong answers for samples [3]
Success!
Success!
{'success': 6, 'fail': 4}
CPU times: user 23.4 s, sys: 1.24 ms, total: 23.4 s
Wall time: 23.4 s


In [17]:
n = 4
# set_random_seed(23) # just for get a complex random function
# X, y = random_boolfunc(n_inputs=n)
X, y = _xor(n)
display(to_dataframe(X, y))
X, y = torch.Tensor(X), torch.Tensor(y)


Unnamed: 0,x4,x3,x2,x1,y
0,0,0,0,0,0
1,0,0,0,1,1
2,0,0,1,0,1
3,0,0,1,1,0
4,0,1,0,0,1
5,0,1,0,1,0
6,0,1,1,0,0
7,0,1,1,1,1
8,1,0,0,0,1
9,1,0,0,1,0


In [18]:
neurons = []
current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

X_new, y_new = check(current_neuron, X, y, device=device)

if X_new is None:
    result_model = current_neuron
    print(result_model)

else:
    neurons.append(deepcopy(current_neuron))
    
    while X_new is not None:
        
        current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

        train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

        X_new, y_new = check(current_neuron, X_new, y_new, device=device)
        
        neurons.append(deepcopy(current_neuron))
    
    # neurons.append(Neuron(n_inputs=n, activation=nn.ReLU()))
    # neurons.append(Neuron(n_inputs=n))
    
    big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)
    
    train(big_model, X, y, device=device, epochs=100000, verbose=False)
    
    check(big_model, X, y, device=device)
    
    result_model = deepcopy(big_model)
    
        
        


Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [7, 11, 13, 14]


In [19]:
check(result_model, X, y, device=device)

BigModel gives wrong answers for samples [7, 11, 13, 14]


(tensor([[0., 1., 1., 1.],
         [1., 0., 1., 1.],
         [1., 1., 0., 1.],
         [1., 1., 1., 0.]]),
 tensor([[1.],
         [1.],
         [1.],
         [1.]]))

In [20]:
result_model

BigModel(
  (base_models): ModuleList(
    (0): Neuron(
      (fc): Linear(in_features=4, out_features=1, bias=True)
      (activation): Sigmoid()
    )
    (1): Neuron(
      (fc): Linear(in_features=4, out_features=1, bias=True)
      (activation): Sigmoid()
    )
  )
  (fc): Linear(in_features=2, out_features=1, bias=True)
  (activation): Sigmoid()
)

In [21]:
result_model(X[15:16])

tensor([[0.3637]], grad_fn=<SigmoidBackward0>)

# Эксперимент 1. XOR(4) vs Algorithm
Учим XOR(4) по алгоритму. З модели, каждая учится 10 раз
* из 2 предобученных нейронов
* 2 предобученных + 1 случайный (экстра-нейрон)
* 2 предобученных + 2 случайных
* 2 предобученных + 3 случайных

## Только 2 предобученных

In [22]:
counter_0_extra = {'success':0, 'fail':0}

for _ in range(10):

    neurons = []
    current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

    train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

    X_new, y_new = check(current_neuron, X, y, device=device)

    if X_new is None:
        result_model = current_neuron
        print(result_model)

    else:
        neurons.append(deepcopy(current_neuron))

        while X_new is not None:

            current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

            train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

            X_new, y_new = check(current_neuron, X_new, y_new, device=device)

            neurons.append(deepcopy(current_neuron))

        # neurons.append(Neuron(n_inputs=n, activation=nn.ReLU()))
        # neurons.append(Neuron(n_inputs=n))

        big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)

        train(big_model, X, y, device=device, epochs=100000, verbose=False)

        check(big_model, X, y, device=device)

        result_model = deepcopy(big_model)

    if check(result_model, X, y, device=device)[0] is None:
        counter_0_extra['success'] += 1
    else:
        counter_0_extra['fail'] += 1

print(counter_0_extra)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [7, 11, 13, 14]
BigModel gives wrong answers for samples [7, 11, 13, 14]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives w

## 2 предобученных + 1 экстра

In [23]:
counter_1_extra = {'success':0, 'fail':0}

for _ in range(10):

    neurons = []
    current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

    train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

    X_new, y_new = check(current_neuron, X, y, device=device)

    if X_new is None:
        result_model = current_neuron
        print(result_model)

    else:
        neurons.append(deepcopy(current_neuron))

        while X_new is not None:

            current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

            train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

            X_new, y_new = check(current_neuron, X_new, y_new, device=device)

            neurons.append(deepcopy(current_neuron))

        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
        # neurons.append(Neuron(n_inputs=n))

        big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)

        train(big_model, X, y, device=device, epochs=100000, verbose=False)

        check(big_model, X, y, device=device)

        result_model = deepcopy(big_model)

    if check(result_model, X, y, device=device)[0] is None:
        counter_1_extra['success'] += 1
    else:
        counter_1_extra['fail'] += 1

print(counter_1_extra)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for samples [15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [7, 11, 13, 14]
BigModel gives wrong answers for samples [7, 11, 13, 14]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answe

## 2 Предобученных + 2 экстра

In [24]:
counter_2_extra = {'success':0, 'fail':0}

for _ in range(10):

    neurons = []
    current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

    train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

    X_new, y_new = check(current_neuron, X, y, device=device)

    if X_new is None:
        result_model = current_neuron
        print(result_model)

    else:
        neurons.append(deepcopy(current_neuron))

        while X_new is not None:

            current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

            train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

            X_new, y_new = check(current_neuron, X_new, y_new, device=device)

            neurons.append(deepcopy(current_neuron))

        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))

        big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)

        train(big_model, X, y, device=device, epochs=100000, verbose=False)

        check(big_model, X, y, device=device)

        result_model = deepcopy(big_model)

    if check(result_model, X, y, device=device)[0] is None:
        counter_2_extra['success'] += 1
    else:
        counter_2_extra['fail'] += 1

print(counter_2_extra)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
BigModel gives wrong answers for samples [15]
BigModel gives wrong answers for

## 2 Предобученных + 3 экстра

In [25]:
counter_3_extra = {'success':0, 'fail':0}

for _ in range(10):

    neurons = []
    current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

    train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

    X_new, y_new = check(current_neuron, X, y, device=device)

    if X_new is None:
        result_model = current_neuron
        print(result_model)

    else:
        neurons.append(deepcopy(current_neuron))

        while X_new is not None:

            current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

            train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

            X_new, y_new = check(current_neuron, X_new, y_new, device=device)

            neurons.append(deepcopy(current_neuron))

        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
        neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))

        big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)

        train(big_model, X, y, device=device, epochs=100000, verbose=False)

        check(big_model, X, y, device=device)

        result_model = deepcopy(big_model)

    if check(result_model, X, y, device=device)[0] is None:
        counter_3_extra['success'] += 1
    else:
        counter_3_extra['fail'] += 1

print(counter_3_extra)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
Success!
Success!
{'success': 10, 'fail': 0}


## Результаты, выводы

* 2 + 0: `{'success': 0, 'fail': 10}`
* 2 + 1: `{'success': 3, 'fail': 7}`
* 2 + 2: `{'success': 9, 'fail': 1}`
* 2 + 3: `{'success': 10, 'fail': 0}`  


# Эксперимент 2. Дают ли буст предобученные нейроны, или все дело только в архитектуре?
Учим большую модель из 3 случайных нейронов и модель с 2 предобученными и 1 экстра

## Большая модель из 3 случайных скрытых нейронов

In [26]:
neurons = []
current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

X_new, y_new = check(current_neuron, X, y, device=device)

if X_new is None:
    result_model = current_neuron
    print(result_model)

else:
    neurons.append(deepcopy(current_neuron))
    
    while X_new is not None:
        
        current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

        train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

        X_new, y_new = check(current_neuron, X_new, y_new, device=device)
        
        neurons.append(deepcopy(current_neuron))
    
    # neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
    # neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
    
    big_model = BigModel([Neuron(n_inputs=n, activation=nn.Sigmoid())]*3, activation=nn.Sigmoid()).to(device) # <--- !!!
    
    train(big_model, X, y, device=device, epochs=100000, verbose=True)
    
    check(big_model, X, y, device=device)
    
    result_model = deepcopy(big_model)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
epoch 10000: loss 0.69
epoch 20000: loss 0.69
epoch 30000: loss 0.69
epoch 40000: loss 0.69
epoch 50000: loss 0.69
epoch 60000: loss 0.69
epoch 70000: loss 0.69
epoch 80000: loss 0.69
epoch 90000: loss 0.69
epoch 100000: loss 0.69
BigModel gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]


## Большая модель с 2 предобученными и 1 случайным нейронами

In [30]:
neurons = []
current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

X_new, y_new = check(current_neuron, X, y, device=device)

if X_new is None:
    result_model = current_neuron
    print(result_model)

else:
    neurons.append(deepcopy(current_neuron))
    
    while X_new is not None:
        
        current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

        train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

        X_new, y_new = check(current_neuron, X_new, y_new, device=device)
        
        neurons.append(deepcopy(current_neuron))
    
    neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
    # neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
    
    big_model = BigModel(neurons, activation=nn.Sigmoid()).to(device)
    
    train(big_model, X, y, device=device, epochs=100000, verbose=True)
    
    check(big_model, X, y, device=device)
    
    result_model = deepcopy(big_model)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
epoch 10000: loss 0.16
epoch 20000: loss 0.00
epoch 30000: loss 0.00
epoch 40000: loss 0.00
epoch 50000: loss 0.00
epoch 60000: loss 0.00
epoch 70000: loss 0.00
epoch 80000: loss 0.00
epoch 90000: loss 0.00
epoch 100000: loss 0.00
Success!


# Попытка 10 раз обучить модель с 3 случайными нейронами

In [31]:
counter_3_random = {'success':0, 'fail':0}

for _ in range(10):

    neurons = []
    current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

    train(current_neuron, X, y, device=device, epochs=5000, verbose=False)

    X_new, y_new = check(current_neuron, X, y, device=device)

    if X_new is None:
        result_model = current_neuron
        print(result_model)

    else:
        neurons.append(deepcopy(current_neuron))

        while X_new is not None:

            current_neuron = Neuron(n_inputs=n, activation=nn.Sigmoid()).to(device)

            train(current_neuron, X_new, y_new, device=device, epochs=5000, verbose=False)

            X_new, y_new = check(current_neuron, X_new, y_new, device=device)

            neurons.append(deepcopy(current_neuron))

        # neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))
        # neurons.append(Neuron(n_inputs=n, activation=nn.Sigmoid()))

        big_model = BigModel([Neuron(n_inputs=n, activation=nn.Sigmoid())]*3, activation=nn.Sigmoid()).to(device) # <--- !!!

        train(big_model, X, y, device=device, epochs=100000, verbose=True)

        check(big_model, X, y, device=device)

        result_model = deepcopy(big_model)
    
    if check(result_model, X, y, device=device)[0] is None:
        counter_3_random['success'] += 1
    else:
        counter_3_random['fail'] += 1

print(counter_3_random)

Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
epoch 10000: loss 0.69
epoch 20000: loss 0.69
epoch 30000: loss 0.69
epoch 40000: loss 0.69
epoch 50000: loss 0.69
epoch 60000: loss 0.69
epoch 70000: loss 0.69
epoch 80000: loss 0.65
epoch 90000: loss 0.65
epoch 100000: loss 0.65
BigModel gives wrong answers for samples [3, 5, 6, 9, 10, 12, 15]
BigModel gives wrong answers for samples [3, 5, 6, 9, 10, 12, 15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
epoch 10000: loss 0.69
epoch 20000: loss 0.69
epoch 30000: loss 0.65
epoch 40000: loss 0.65
epoch 50000: loss 0.65
epoch 60000: loss 0.65
epoch 70000: loss 0.65
epoch 80000: loss 0.65
epoch 90000: loss 0.65
epoch 100000: loss 0.65
BigModel gives wrong answers for samples [3, 5, 6, 9, 10, 12, 15]
BigModel gives wrong answers for samples [3, 5, 6, 9, 10, 12, 15]
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
Success!
epoch 10000: loss 0.69
epoch 20000: loss 0.69


## Результаты, выводы
Большая модель из 3-х случайных скрытых нейронов не обучается, а если в ней 2 предобученных и 1 случайный - то обучается