In [1]:
import math as ma
import torch

def generate_dataset(N = 1000):
    #Fait un tensor N*2 (ensemble de coordonnées (x,y))
    inp = torch.empty(N, 2).uniform_(0, 1) 
    #centre du cercle en 0.5
    a = torch.sub(inp, 0.5)
    #équation de cercle
    clas = a.pow(2).sum(1).sub(1 / (2*ma.pi)).sign().div(-1).add(1).div(2).long()
    return inp, clas


######################################################################

def sigma(x):
    return x.mul(-1).exp().add(1)**(-1)

def dsigma(x):
    return x.mul(-1).exp() * (sigma(x))**2

######################################################################

def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

######################################################################

def forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, x):
    x0 = x
    s1 = w1.mv(x0) + b1
    x1 = sigma(s1)
    s2 = w2.mv(x1) + b2
    x2 = sigma(s2)
    s3 = w3.mv(x2) + b3
    x3 = sigma(s3)
    sout = wout.mv(x3) + bout
    xout = sigma(sout)
    return x0, s1, x1, s2, x2, s3, x3, sout, xout

def backward_pass(w1, b1, w2, b2, w3, b3, wout, bout,
                  t,
                  x, s1, x1, s2, x2, s3, x3, sout, xout,
                  dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout):
    x0 = x
    dl_dxout = dloss(xout, t)
    dl_dsout = dsigma(sout) * dl_dxout
    dl_dx3 = wout.t().mv(dl_dsout)
    dl_ds3 = dsigma(s3) * dl_dx3
    dl_dx2 = w3.t().mv(dl_ds3)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1
   
    dl_dwout = dl_dsout.view(-1, 1).mm(x3.view(1, -1))
    dl_dbout = dl_dsout
    dl_dw3 = dl_ds3.view(-1, 1).mm(x2.view(1, -1))
    dl_db3 = dl_ds3
    dl_dw2 = dl_ds2.view(-1, 1).mm(x1.view(1, -1))
    dl_db2 = dl_ds2
    dl_dw1 = dl_ds1.view(-1, 1).mm(x0.view(1, -1))
    dl_db1 = dl_ds1
    return dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout
######################################################################

train_input, train_target = generate_dataset(1000)
test_input, test_target = generate_dataset(1000)


mean, std = train_input.mean(), train_input.std()

train_input = train_input.sub_(mean).div_(std)
test_input = test_input.sub_(mean).div_(std)
nb_train_samples = train_input.size(0)
epsilon = 1
nb_hidden = 25
eta = 0.1 

w1 = torch.empty(nb_hidden, train_input.size(1)).normal_(0,epsilon)
b1 = torch.zeros(nb_hidden)
w2 = torch.empty(nb_hidden, nb_hidden).normal_(0,epsilon)
b2 = torch.zeros(nb_hidden)
w3 = torch.empty(nb_hidden, nb_hidden).normal_(0,epsilon)
b3 = torch.zeros(nb_hidden)
wout = torch.empty(1, nb_hidden).normal_(0,epsilon)
bout = torch.zeros(1)

dl_dw1 = torch.empty(w1.size())
dl_db1 = torch.empty(b1.size())
dl_dw2 = torch.empty(w2.size())
dl_db2 = torch.empty(b2.size())
dl_dw3 = torch.empty(w3.size())
dl_db3 = torch.empty(b3.size())
dl_dwout = torch.empty(wout.size())
dl_dbout = torch.empty(bout.size())

for k in range(10):
    acc_loss = 0
    nb_train_errors = 0
    
    dl_dw1.zero_()
    dl_db1.zero_()
    dl_dw2.zero_()
    dl_db2.zero_()
    dl_dw3.zero_()
    dl_db3.zero_()
    dl_dwout.zero_()
    dl_dbout.zero_()
    for n in range(nb_train_samples):
        x0, s1, x1, s2, x2, s3, x3, sout, xout = forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, train_input[n])
        if xout <= 0.5:
            pred = 0
        if xout > 0.5:
            pred = 1
        if n>= 0 and n<10:
            print(xout,train_target[n])
        if train_target[n] != pred: nb_train_errors = nb_train_errors + 1
        acc_loss = acc_loss + loss(xout, train_target[n])

        dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout = backward_pass(w1, b1, w2, b2,w3,b3, wout, bout,
                      train_target[n],
                      x0, s1, x1, s2, x2, s3, x3, sout, xout,
                      dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout)
        
        # Gradient step
        w1 = w1 - eta * dl_dw1
        b1 = b1 - eta * dl_db1
        w2 = w2 - eta * dl_dw2
        b2 = b2 - eta * dl_db2
        w3 = w3 - eta * dl_dw3
        b3 = b3 - eta * dl_db3
        wout = wout - eta * dl_dwout
        bout = bout - eta * dl_dbout
    # Test error

    nb_test_errors = 0

    for n in range(test_input.size(0)):
        _, _, _, _, _, _, _, _, xout = forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, test_input[n])

        if xout <=0.5:
            pred = 0
        if xout > 0.5:
            pred = 1
        if test_target[n] != pred: nb_test_errors = nb_test_errors + 1

    print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(k,
                  acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))


tensor([0.0119]) tensor(0)
tensor([0.0029]) tensor(1)
tensor([0.0014]) tensor(0)
tensor([0.0020]) tensor(0)
tensor([0.0022]) tensor(1)
tensor([0.0040]) tensor(1)
tensor([0.0037]) tensor(1)
tensor([0.0102]) tensor(1)
tensor([0.0012]) tensor(0)
tensor([0.0018]) tensor(1)
0 acc_train_loss 246.72 acc_train_error 36.10% test_error 31.20%
tensor([0.3169]) tensor(0)
tensor([0.6432]) tensor(1)
tensor([0.2927]) tensor(0)
tensor([0.2386]) tensor(0)
tensor([0.2928]) tensor(1)
tensor([0.7442]) tensor(1)
tensor([0.5577]) tensor(1)
tensor([0.4496]) tensor(1)
tensor([0.4208]) tensor(0)
tensor([0.5227]) tensor(1)
1 acc_train_loss 120.95 acc_train_error 15.00% test_error 20.50%
tensor([0.1590]) tensor(0)
tensor([0.9141]) tensor(1)
tensor([0.1928]) tensor(0)
tensor([0.1699]) tensor(0)
tensor([0.3680]) tensor(1)
tensor([0.9403]) tensor(1)
tensor([0.7695]) tensor(1)
tensor([0.2756]) tensor(1)
tensor([0.2622]) tensor(0)
tensor([0.6248]) tensor(1)
2 acc_train_loss 81.13 acc_train_error 9.80% test_error 16.4

In [2]:
import math as ma
import torch
def sigma(x):
    return x.add(-0.5).tanh()

def dsigma(x):
    return 4 * (x.add(-0.5).exp() + x.add(-0.5).mul(-1).exp()).pow(-2)

In [3]:
print(dsigma(torch.tensor([1.5])))

tensor([0.4200])


In [6]:
x = torch.tensor([2., 3., 4., 4.])
y = x**(-1)
print(y)

tensor([0.5000, 0.3333, 0.2500, 0.2500])


In [1]:
import math as ma
import torch

def generate_dataset(N = 1000):
    #Fait un tensor N*2 (ensemble de coordonnées (x,y))
    inp = torch.empty(N, 2).uniform_(0, 1) 
    #centre du cercle en 0.5
    a = torch.sub(inp, 0.5)
    #équation de cercle
    clas = a.pow(2).sum(1).sub(1 / (2*ma.pi)).sign().div(-1).add(1).div(2).long()
    return inp, clas


######################################################################

def sigma(x):
    return x.mul(-1).exp().add(1)**(-1)

def dsigma(x):
    return x.mul(-1).exp() * (sigma(x))**2

######################################################################

def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

######################################################################

def forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, x):
    x0 = x
    s1 = w1.mv(x0) + b1
    x1 = sigma(s1)
    s2 = w2.mv(x1) + b2
    x2 = sigma(s2)
    s3 = w3.mv(x2) + b3
    x3 = sigma(s3)
    sout = wout.mv(x3) + bout
    xout = sigma(sout)
    return x0, s1, x1, s2, x2, s3, x3, sout, xout

def backward_pass(w1, b1, w2, b2, w3, b3, wout, bout,
                  t,
                  x, s1, x1, s2, x2, s3, x3, sout, xout,
                  dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout):
    x0 = x
    dl_dxout = dloss(xout, t)
    dl_dsout = dsigma(sout) * dl_dxout
    dl_dwout = torch.mul(x3,dl_dsout)
    dl_dbout = dl_dsout

    dl_dx3 = wout * dl_dsout
    dl_ds3 = torch.mul(dsigma(s3), dl_dx3)
    dl_dw3 = torch.mm(dl_ds3.t(),x2.unsqueeze(1).t())
    dl_db3 = dl_ds3
    
    dl_dx2 = torch.matmul(w3, dl_ds3.t())
    dl_ds2 = torch.mul(dsigma(s2).unsqueeze(1), dl_dx2)
    dl_dw2 = torch.mm(dl_ds2,x1.unsqueeze(1).t())
    dl_db2 = dl_ds2
    
    dl_dx1 = torch.matmul(w2, dl_ds2)
    dl_ds1 = torch.mul(dsigma(s1).unsqueeze(1), dl_dx1)
    dl_dw1 = torch.mm(dl_ds1,x0.unsqueeze(1).t())
    dl_db1 = dl_ds1
    return dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout
######################################################################

train_input, train_target =  generate_dataset(1000)
test_input, test_target = generate_dataset(1000)

mean, std = train_input.mean(), train_input.std()

train_input = train_input.sub_(mean).div_(std)
test_input = test_input.sub_(mean).div_(std)
nb_train_samples = train_input.size(0)
epsilon = 0.01
nb_hidden = 25
eta = 0.1 / nb_train_samples

w1 = torch.empty(nb_hidden, train_input.size(1)).uniform_(-1/2, 1/2)
b1 = torch.zeros(nb_hidden)
w2 = torch.empty(nb_hidden, nb_hidden).uniform_(-1/25, 1/25)
b2 = torch.zeros(nb_hidden)
w3 = torch.empty(nb_hidden, nb_hidden).uniform_(-1/25, 1/25)
b3 = torch.zeros(nb_hidden)
wout = torch.empty(1, nb_hidden).uniform_(-1/25, 1/25)
bout = torch.zeros(1)

dl_dw1 = torch.empty(w1.size())
dl_db1 = torch.empty(b1.size())
dl_dw2 = torch.empty(w2.size())
dl_db2 = torch.empty(b2.size())
dl_dw3 = torch.empty(w3.size())
dl_db3 = torch.empty(b3.size())
dl_dwout = torch.empty(wout.size())
dl_dbout = torch.empty(bout.size())

for k in range(20):
    acc_loss = 0
    nb_train_errors = 0
    
    dl_dw1.zero_()
    dl_db1.zero_()
    dl_dw2.zero_()
    dl_db2.zero_()
    dl_dw3.zero_()
    dl_db3.zero_()
    dl_dwout.zero_()
    dl_dbout.zero_()
    for n in range(nb_train_samples):
        x0, s1, x1, s2, x2, s3, x3, sout, xout = forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, train_input[n])
        if xout <= 0.5:
            pred = 0
        if xout > 0.5:
            pred = 1
        if train_target[n] != pred: nb_train_errors = nb_train_errors + 1
        acc_loss = acc_loss + loss(xout, train_target[n])

        dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout = backward_pass(w1, b1, w2, b2,w3,b3, wout, bout,
                      train_target[n],
                      x0, s1, x1, s2, x2, s3, x3, sout, xout,
                      dl_dw1, dl_db1, dl_dw2, dl_db2, dl_dw3, dl_db3, dl_dwout, dl_dbout)

        # Gradient step
        w1 = w1 - eta * dl_dw1
        b1 = b1 - eta * dl_db1
        w2 = w2 - eta * dl_dw2
        b2 = b2 - eta * dl_db2
        w3 = w3 - eta * dl_dw3
        b3 = b3 - eta * dl_db3
        wout = wout - eta * dl_dwout
        bout = bout - eta * dl_dbout
    # Test error

    nb_test_errors = 0

    for n in range(test_input.size(0)):
        _, _, _, _, _, _, _, _, xout = forward_pass(w1, b1, w2, b2, w3, b3, wout, bout, test_input[n])

        if xout <=0.5:
            pred = 0
        if xout > 0.5:
            pred = 1
        if test_target[n] != pred: nb_test_errors = nb_test_errors + 1

    print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
          .format(k,
                  acc_loss,
                  (100 * nb_train_errors) / train_input.size(0),
                  (100 * nb_test_errors) / test_input.size(0)))


RuntimeError: vector + matrix @ vector expected, got 1, 2, 2

In [2]:
a = torch.Tensor([1.,5.,3.]).unsqueeze(0)
b = torch.Tensor([2.,5.,6.]).unsqueeze(1)
print(a.size())

torch.Size([1, 3])


In [4]:
print(torch.mm(a.t(),b.t()))

tensor([[ 2.,  5.,  6.],
        [10., 25., 30.],
        [ 6., 15., 18.]])


In [7]:
x2.unsqueeze(1)

torch.Size([25])
