In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import random

def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [2]:
def dec2bin(x, n=7):
    """
    creating a binary list of integer non-negative x
    """
    u = 2 ** n - 1
    x = int(x)
    assert x >= 0, 'Input value x must be non-negative'
    assert x <= u, f'Input value x with n = {n} must be less than {u}'
    
    y = []
    if x == 0:
        for i in range(n):
            y.append(0)
    else:
        while x != 1:
            y.append(x % 2)
            x = x // 2
        y.append(x)
        delta = n - len(y)
        for i in range(delta):
            y.append(0)
        y.reverse()
    return y

In [3]:
def get_all_X(n_inputs):
    """
    for given number of variables returns
    all binary combinations of these variables
    """
    X = []
    for i in range(2**n_inputs):
        X += [dec2bin(i, n=n_inputs)]
    X = np.array(X)
    return X

In [4]:
def random_boolfunc(n_inputs):
    """
    for given number of variables returns
    the truth table, where output is random binary vector
    """
    X = get_all_X(n_inputs)
    y = np.random.randint(0, 2, size=(2**n_inputs, 1))
    return X, y
   
def _and(n_inputs=2):
    """
    for given number of variables returns
    the truth table of AND logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.zeros(shape=(2 ** n_inputs - 1, 1), dtype=int),
        np.array([[1]]),
        axis=0
    )
    return X, y
    
def _or(n_inputs=2):
    """
    for given number of variables returns
    the truth table of OR logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.array([[0]]),
        np.ones(shape=(2 ** n_inputs - 1, 1), dtype=int),
        axis=0
    )
    return X, y

def _xor(n_inputs=2):
    """
    for given number of variables returns
    the truth table of XOR logical gate
    """
    X = get_all_X(n_inputs)
    y = (np.sum(X, axis=1) % 2).reshape(-1, 1)
    return X, y

def to_dataframe(X, y):
    """
    for the truth table in form of two arrays 
    X [2 ** n_inputs, n_inputs] and y [2 ** n_inputs, 1]
    combine it to the form of Pandas DataFrame
    """
    data=np.concatenate((X, y), axis=1)
    n_inputs = X.shape[1]
    return pd.DataFrame(data=data, columns=[f'x{i}' for i in range(n_inputs, 0, -1)] + ['y'])

In [76]:
%%time

n = 2
set_random_seed(23) # just for get a complex random function
# X, y = random_boolfunc(n_inputs=n)
X, y = _xor(2)
to_dataframe(X, y)

Wall time: 2.99 ms


Unnamed: 0,x2,x1,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [7]:
X, y = torch.Tensor(X), torch.Tensor(y)

In [8]:
class Neuron(nn.Module):
    def __init__(self, n_inputs, activation=nn.Sigmoid()):
        super().__init__()
        self.fc = nn.Linear(n_inputs, 1)
        self.activation = activation
    
    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

In [9]:
def train(model, X, y, optimizer, criterion, epochs, verbose=True):
    
    log_epoch = epochs // 10
    for i in range(1, epochs + 1):
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        if verbose and i % log_epoch == 0:
            print(f'epoch {i}: loss {loss:.2f}')

def step(x):
    return torch.heaviside(x - 0.5, torch.tensor([[1.]]))

def check(model, X, y):
    out = model(X)
    step_out = step(out)
    mask = (step_out != y)[:,0]  
    
    if any(mask):
        wrong_ans_numbers = [i for i in range(mask.shape[0]) if mask[i].item()]
        print(f'{type(model).__name__} gives wrong answers for samples {wrong_ans_numbers}')
        return X[mask], y[mask]
    else:
        print('Success!')
        return (None, None)

In [54]:
model = Neuron(n_inputs=n)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(model, X, y, optimizer, criterion, epochs=5000)


epoch 500: loss 0.69
epoch 1000: loss 0.69
epoch 1500: loss 0.69
epoch 2000: loss 0.69
epoch 2500: loss 0.69
epoch 3000: loss 0.69
epoch 3500: loss 0.69
epoch 4000: loss 0.69
epoch 4500: loss 0.69
epoch 5000: loss 0.69


In [55]:
X_new, y_new = check(model, X, y)

Neuron gives wrong answers for samples [0, 3]


In [56]:
model2 = Neuron(n_inputs=n)

optimizer = torch.optim.Adam(model2.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(model2, X_new, y_new, optimizer, criterion, epochs=5000)

epoch 500: loss 0.00
epoch 1000: loss 0.00
epoch 1500: loss 0.00
epoch 2000: loss 0.00
epoch 2500: loss 0.00
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [57]:
X_new2, y_new2 = check(model2, X_new, y_new)

Success!


In [58]:
from copy import deepcopy

# https://github.com/pytorch/pytorch/issues/36459#:~:text=Here%27s%20my%20definition%3A

class BigModel(nn.Module):
    def __init__(self, base_models : list):
        super().__init__()
        self.base_models = nn.ModuleList(deepcopy(base_models))
        
        # for model in self.base_models:
        #     for param in model.parameters():
        #         param.requires_grad = False
        
        self.n_hidden = len(base_models)
        self.fc = nn.Linear(self.n_hidden, 1)
        self.activation = nn.Sigmoid()
        self.merge = lambda x: torch.cat(x, dim=1)
    
    def forward(self, x):
        hidden_out = self.merge([module(x) for module in self.base_models])
        out = self.activation(self.fc(hidden_out))
        
        return out

In [84]:
big_model = BigModel([model, model2])

optimizer = torch.optim.Adam(big_model.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(big_model, X, y, optimizer, criterion, epochs=5000)

epoch 500: loss 0.29
epoch 1000: loss 0.04
epoch 1500: loss 0.02
epoch 2000: loss 0.01
epoch 2500: loss 0.01
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [85]:
check(big_model, X, y)

Success!


(None, None)

In [17]:
# если не учить веса базовых нейронов, нифига не учится

In [70]:
counter = {'success':0, 'fail':0}

for _ in range(10):

    mlp = nn.Sequential(
        nn.Linear(n,2),
        nn.Sigmoid(),
        nn.Linear(2,1),
        nn.Sigmoid()
    )

    optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.01)
    criterion = torch.nn.BCELoss()

    train(mlp, X, y, optimizer, criterion, epochs=5000, verbose=False)

    if check(mlp, X, y)[0] is None:
        counter['success'] += 1
    else:
        counter['fail'] += 1

print(counter)

Success!
Success!
Success!
Sequential gives wrong answers for samples [2, 3]
Sequential gives wrong answers for samples [1, 3]
Success!
Sequential gives wrong answers for samples [2, 3]
Success!
Sequential gives wrong answers for samples [3]
Success!
{'success': 6, 'fail': 4}


In [21]:
counter

{'success': 7, 'fail': 3}

In [102]:
n = 4
set_random_seed(23) # just for get a complex random function
# X, y = random_boolfunc(n_inputs=n)
X, y = _xor(n)
display(to_dataframe(X, y))
X, y = torch.Tensor(X), torch.Tensor(y)

Unnamed: 0,x4,x3,x2,x1,y
0,0,0,0,0,0
1,0,0,0,1,1
2,0,0,1,0,1
3,0,0,1,1,0
4,0,1,0,0,1
5,0,1,0,1,0
6,0,1,1,0,0
7,0,1,1,1,1
8,1,0,0,0,1
9,1,0,0,1,0


In [130]:
neurons = []
current_neuron = Neuron(n_inputs=n)

optimizer = torch.optim.Adam(current_neuron.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(current_neuron, X, y, optimizer, criterion, epochs=5000)

X_new, y_new = check(current_neuron, X, y)

if X_new is None:
    result_model = current_neuron
    print(result_model)

else:
    neurons.append(deepcopy(current_neuron))
    
    while X_new is not None:
        
        current_neuron = Neuron(n_inputs=n)

        optimizer = torch.optim.Adam(current_neuron.parameters(), lr = 0.01)
        criterion = torch.nn.BCELoss()

        train(current_neuron, X_new, y_new, optimizer, criterion, epochs=5000)

        X_new, y_new = check(current_neuron, X_new, y_new)
        
        neurons.append(deepcopy(current_neuron))
    
    # neurons.append(Neuron(n_inputs=n))
    # neurons.append(Neuron(n_inputs=n))
    
    big_model = BigModel(neurons)
    
    optimizer = torch.optim.Adam(big_model.parameters(), lr = 0.01)
    criterion = torch.nn.BCELoss()

    train(big_model, X, y, optimizer, criterion, epochs=20000)
    
    check(big_model, X, y)
    
    result_model = deepcopy(big_model)
    
# print(result_model)
        
        


epoch 500: loss 0.69
epoch 1000: loss 0.69
epoch 1500: loss 0.69
epoch 2000: loss 0.69
epoch 2500: loss 0.69
epoch 3000: loss 0.69
epoch 3500: loss 0.69
epoch 4000: loss 0.69
epoch 4500: loss 0.69
epoch 5000: loss 0.69
Neuron gives wrong answers for samples [0, 3, 5, 6, 9, 10, 12, 15]
epoch 500: loss 0.03
epoch 1000: loss 0.01
epoch 1500: loss 0.01
epoch 2000: loss 0.00
epoch 2500: loss 0.00
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00
Success!
epoch 2000: loss 0.45
epoch 4000: loss 0.45
epoch 6000: loss 0.45
epoch 8000: loss 0.45
epoch 10000: loss 0.45
epoch 12000: loss 0.45
epoch 14000: loss 0.45
epoch 16000: loss 0.45
epoch 18000: loss 0.45
epoch 20000: loss 0.45
BigModel gives wrong answers for samples [7, 11, 13, 14]


In [131]:
check(result_model, X, y)

BigModel gives wrong answers for samples [7, 11, 13, 14]


(tensor([[0., 1., 1., 1.],
         [1., 0., 1., 1.],
         [1., 1., 0., 1.],
         [1., 1., 1., 0.]]),
 tensor([[1.],
         [1.],
         [1.],
         [1.]]))

In [132]:
result_model

BigModel(
  (base_models): ModuleList(
    (0): Neuron(
      (fc): Linear(in_features=4, out_features=1, bias=True)
      (activation): Sigmoid()
    )
    (1): Neuron(
      (fc): Linear(in_features=4, out_features=1, bias=True)
      (activation): Sigmoid()
    )
  )
  (fc): Linear(in_features=2, out_features=1, bias=True)
  (activation): Sigmoid()
)