In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import random

def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [2]:
def dec2bin(x, n=7):
    """
    creating a binary list of integer non-negative x
    """
    u = 2 ** n - 1
    x = int(x)
    assert x >= 0, 'Input value x must be non-negative'
    assert x <= u, f'Input value x with n = {n} must be less than {u}'
    
    y = []
    if x == 0:
        for i in range(n):
            y.append(0)
    else:
        while x != 1:
            y.append(x % 2)
            x = x // 2
        y.append(x)
        delta = n - len(y)
        for i in range(delta):
            y.append(0)
        y.reverse()
    return y

In [3]:
def get_all_X(n_inputs):
    """
    for given number of variables returns
    all binary combinations of these variables
    """
    X = []
    for i in range(2**n_inputs):
        X += [dec2bin(i, n=n_inputs)]
    X = np.array(X)
    return X

In [4]:
def random_boolfunc(n_inputs):
    """
    for given number of variables returns
    the truth table, where output is random binary vector
    """
    X = get_all_X(n_inputs)
    y = np.random.randint(0, 2, size=(2**n_inputs, 1))
    return X, y
   
def _and(n_inputs=2):
    """
    for given number of variables returns
    the truth table of AND logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.zeros(shape=(2 ** n_inputs - 1, 1), dtype=int),
        np.array([[1]]),
        axis=0
    )
    return X, y
    
def _or(n_inputs=2):
    """
    for given number of variables returns
    the truth table of OR logical gate
    """
    X = get_all_X(n_inputs)
    y = np.append(
        np.array([[0]]),
        np.ones(shape=(2 ** n_inputs - 1, 1), dtype=int),
        axis=0
    )
    return X, y

def _xor(n_inputs=2):
    """
    for given number of variables returns
    the truth table of XOR logical gate
    """
    X = get_all_X(n_inputs)
    y = (np.sum(X, axis=1) % 2).reshape(-1, 1)
    return X, y

def to_dataframe(X, y):
    """
    for the truth table in form of two arrays 
    X [2 ** n_inputs, n_inputs] and y [2 ** n_inputs, 1]
    combine it to the form of Pandas DataFrame
    """
    data=np.concatenate((X, y), axis=1)
    n_inputs = X.shape[1]
    return pd.DataFrame(data=data, columns=[f'x{i}' for i in range(n_inputs, 0, -1)] + ['y'])

In [5]:
n = 2

In [6]:
%%time

set_random_seed(23) # just for get a complex random function
# X, y = random_boolfunc(n_inputs=n)
X, y = _xor(2)
to_dataframe(X, y)

Wall time: 11.7 ms


Unnamed: 0,x2,x1,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [7]:
X, y = torch.Tensor(X), torch.Tensor(y)

In [8]:
class Neuron(nn.Module):
    def __init__(self, n_inputs, activation=nn.Sigmoid()):
        super().__init__()
        self.fc = nn.Linear(n_inputs, 1)
        self.activation = activation
    
    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

In [9]:
def train(model, X, y, optimizer, criterion, epochs):
    
    log_epoch = epochs // 10
    for i in range(1, epochs + 1):
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        if i % log_epoch == 0:
            print(f'epoch {i}: loss {loss:.2f}')

In [10]:
model = Neuron(n_inputs=n)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(model, X, y, optimizer, criterion, epochs=5000)


epoch 500: loss 0.69
epoch 1000: loss 0.69
epoch 1500: loss 0.69
epoch 2000: loss 0.69
epoch 2500: loss 0.69
epoch 3000: loss 0.69
epoch 3500: loss 0.69
epoch 4000: loss 0.69
epoch 4500: loss 0.69
epoch 5000: loss 0.69


In [11]:
output = model(X)
output

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward0>)

In [12]:
def step(x):
    return torch.heaviside(x - 0.5, torch.tensor([[1.]]))

In [13]:
step_out = step(output)
step_out

tensor([[1.],
        [1.],
        [1.],
        [1.]], grad_fn=<NotImplemented>)

In [14]:
y

tensor([[0.],
        [1.],
        [1.],
        [0.]])

In [15]:
mask = (step_out != y)[:,0]
mask

tensor([ True, False, False,  True])

In [16]:
X_new = X[mask]
y_new = y[mask]
X_new, y_new

(tensor([[0., 0.],
         [1., 1.]]),
 tensor([[0.],
         [0.]]))

In [17]:
model2 = Neuron(n_inputs=n)

optimizer = torch.optim.Adam(model2.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(model2, X_new, y_new, optimizer, criterion, epochs=5000)

epoch 500: loss 0.03
epoch 1000: loss 0.01
epoch 1500: loss 0.01
epoch 2000: loss 0.00
epoch 2500: loss 0.00
epoch 3000: loss 0.00
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [18]:
output2 = model2(X_new)
output2

tensor([[8.9694e-04],
        [3.1796e-05]], grad_fn=<SigmoidBackward0>)

In [19]:
step_out2 = step(output2)
step_out2

tensor([[0.],
        [0.]], grad_fn=<NotImplemented>)

In [20]:
y_new

tensor([[0.],
        [0.]])

In [21]:
mask2 = (step_out2 != y_new)[:,0]
mask2

tensor([False, False])

In [22]:
from copy import deepcopy

class BigModel(nn.Module):
    def __init__(self, base_models):
        super().__init__()
        self.base_models = nn.ModuleList(base_models)
        
        for model in self.base_models:
            for param in model.parameters():
                param.requires_grad = False
        
        self.n_hidden = len(base_models)
        self.fc = nn.Linear(self.n_hidden, 1)
        self.activation = nn.Sigmoid()
    
    def forward(self, x):
        hidden_out = torch.empty(x.size()[0],self.n_hidden)
        for n, base_model in enumerate(self.base_models):
            tmp = base_model(x)[:,0]
            hidden_out[:,n] = tmp
        
        out = self.activation(self.fc(hidden_out))
        
        return out
BigModel([model, model2])

BigModel(
  (base_models): ModuleList(
    (0): Neuron(
      (fc): Linear(in_features=2, out_features=1, bias=True)
      (activation): Sigmoid()
    )
    (1): Neuron(
      (fc): Linear(in_features=2, out_features=1, bias=True)
      (activation): Sigmoid()
    )
  )
  (fc): Linear(in_features=2, out_features=1, bias=True)
  (activation): Sigmoid()
)

In [23]:
# model = Neuron(n_inputs=n)
# model2 = Neuron(n_inputs=n)
big_model = BigModel([model, model2])

optimizer = torch.optim.Adam(big_model.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(big_model, X, y, optimizer, criterion, epochs=5000)

epoch 500: loss 0.69
epoch 1000: loss 0.69
epoch 1500: loss 0.69
epoch 2000: loss 0.69
epoch 2500: loss 0.69
epoch 3000: loss 0.69
epoch 3500: loss 0.69
epoch 4000: loss 0.69
epoch 4500: loss 0.69
epoch 5000: loss 0.69


In [None]:
# если не учить веса базовых нейронов, нифига не учится

In [26]:
from typing import Callable, Union, Dict

import torch
import torch.nn as nn


class Parallel(nn.ModuleList):
    """Runs modules in parallel on the same input and merges their results."""

    def __init__(self, *modules: nn.Module, merge: Union[str, Callable] = "sum"):
        """Runs modules in parallel on the same input and merges their results.

        Args:
            merge: operation for merging list of results (default: `"sum"`)
        """
        super().__init__(modules)
        self.merge = create_merge(merge)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.merge([module(x) for module in self])


MERGE_METHODS: Dict[str, Callable] = {
    "cat": lambda xs: torch.cat(xs, dim=1),
    "sum": lambda xs: sum(xs),  # type: ignore
    "prod": lambda xs: reduce(lambda x, y: x * y, xs),  # type: ignore
}


def create_merge(merge: Union[str, Callable]) -> Callable:
    return MERGE_METHODS[merge] if isinstance(merge, str) else merge

In [27]:
from copy import deepcopy

class BigModel2(nn.Module):
    def __init__(self, base_models: list):
        super().__init__()
        self.base_models = Parallel(
            *base_models, merge='cat'
        )
        
        
        self.n_hidden = len(base_models)
        self.fc = nn.Linear(self.n_hidden, 1)
        self.activation = nn.Sigmoid()
    
    def forward(self, x):
        hidden_out = self.base_models(x)
        # hidden_out = torch.empty(x.size()[0],self.n_hidden)
        # for n, base_model in enumerate(self.base_models):
        #     tmp = base_model(x)[:,0]
        #     hidden_out[:,n] = tmp
        
        out = self.activation(self.fc(hidden_out))
        
        return out
    
bb = BigModel2([model, model2])
bb

BigModel2(
  (base_models): Parallel(
    (0): Neuron(
      (fc): Linear(in_features=2, out_features=1, bias=True)
      (activation): Sigmoid()
    )
    (1): Neuron(
      (fc): Linear(in_features=2, out_features=1, bias=True)
      (activation): Sigmoid()
    )
  )
  (fc): Linear(in_features=2, out_features=1, bias=True)
  (activation): Sigmoid()
)

In [28]:
# model = Neuron(n_inputs=n)
# model2 = Neuron(n_inputs=n)
big_model = BigModel2([model, model2])

optimizer = torch.optim.Adam(big_model.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(big_model, X, y, optimizer, criterion, epochs=5000)

epoch 500: loss 0.14
epoch 1000: loss 0.05
epoch 1500: loss 0.03
epoch 2000: loss 0.02
epoch 2500: loss 0.01
epoch 3000: loss 0.01
epoch 3500: loss 0.01
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [87]:
mlp = nn.Sequential(
    nn.Linear(n,2),
    nn.Sigmoid(),
    nn.Linear(2,1),
    nn.Sigmoid()
)

In [88]:
optimizer = torch.optim.Adam(mlp.parameters(), lr = 0.01)
criterion = torch.nn.BCELoss()

train(mlp, X, y, optimizer, criterion, epochs=5000)

epoch 500: loss 0.12
epoch 1000: loss 0.04
epoch 1500: loss 0.02
epoch 2000: loss 0.01
epoch 2500: loss 0.01
epoch 3000: loss 0.01
epoch 3500: loss 0.00
epoch 4000: loss 0.00
epoch 4500: loss 0.00
epoch 5000: loss 0.00


In [89]:
output3 = mlp(X)
output3

tensor([[0.0012],
        [0.9979],
        [0.9979],
        [0.0012]], grad_fn=<SigmoidBackward0>)

In [90]:
step_out3 = step(output3)
step_out3

tensor([[0.],
        [1.],
        [1.],
        [0.]], grad_fn=<NotImplemented>)

In [91]:
y

tensor([[0.],
        [1.],
        [1.],
        [0.]])

In [92]:
mask = (step_out3 != y)[:,0]
mask

tensor([False, False, False, False])

In [93]:
model2(X)

tensor([[7.9415e-03],
        [9.8686e-01],
        [6.5932e-07],
        [6.1455e-03]], grad_fn=<SigmoidBackward0>)