In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

In [3]:
print(torch.cuda.current_device())
print(torch.cuda.is_available())

0
True


In [4]:
start = time.time()
t1 = torch.FloatTensor(10,10)
t2 = torch.FloatTensor(10,10)
t3 = t1.matmul(t2)
end = time.time()
print('CPU ', end-start)
print(t3)

start = time.time()
t1 = torch.cuda.FloatTensor(10,10)
t2 = torch.cuda.FloatTensor(10,10)
t3 = t1.matmul(t2)
end = time.time()
print('GPU ', end-start)
print(t3)

CPU  0.0019943714141845703
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
GPU  0.0029916763305664062
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [5]:
lin = nn.Linear(5, 3)  # maps from R^5 to R^3, parameters A, b
# data is 2x5.  A maps from 5 to 3... can we map "data" under A?
data = torch.randn(2, 5)
print(lin(data))  # yes

tensor([[ 0.6979,  0.8241,  0.7742],
        [-0.6039,  0.3429, -0.1860]], grad_fn=<AddmmBackward>)


In [6]:
#Input array
X = torch.Tensor([[1,0,1,0],[1,0,1,1],[0,1,0,1]])

#Output
y = torch.Tensor([[1],[1],[0]])

#Sigmoid Function
def sigmoid (x):
  return 1/(1 + torch.exp(-x))

#Derivative of Sigmoid Function
def deriv_sigmoid(x):
  return x * (1 - x)

#Hypers
epoch=5000 
lr=0.1 
inp = X.shape[1] 
hid = 3
out = 1

# weight and bias initialization
wh=torch.randn(inp, hid).type(torch.FloatTensor)
bh=torch.randn(1, hid).type(torch.FloatTensor)
wout=torch.randn(hid, out)
bout=torch.randn(1, out)

In [10]:
for i in range(epoch):

  #Forward step
  hid_inp1 = torch.mm(X, wh)
  hid_inp = hid_inp1 + bh
  hid_act = sigmoid(hid_inp)
 
  out_inp1 = torch.mm(hid_act, wout)
  out_inp = out_inp1 + bout
  out = sigmoid(out_inp1)

  #Backward step
  E = y-output
  slope_out = deriv_sigmoid(out)
  slope_hid = deriv_sigmoid(hid_act)
  d_out = E * slope_out
  err_hid = torch.mm(d_out, wout.t())
  d_hid = err_hid * slope_hid
  wout += torch.mm(hid_act.t(), d_out) * lr
  bout += d_out.sum() * lr
  wh += torch.mm(X.t(), d_hid) * lr
  bh += d_output.sum() * lr
 
'actual:', y, 'predicted:', out

('actual:',
 tensor([[1.],
         [1.],
         [0.]]),
 'predicted:',
 tensor([[0.9688],
         [0.9627],
         [0.0925]]))

## Credits & Links

https://towardsdatascience.com/how-to-train-your-neural-net-tensors-and-autograd-941f2c4cc77c
https://www.analyticsvidhya.com/blog/2018/02/pytorch-tutorial/