In [24]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## Init tensor

In [3]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.7063, 0.6321],
        [0.6462, 0.6457],
        [0.0727, 0.6894]])


In [4]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [5]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [6]:
x = torch.tensor([[1, 2], 
                 [3, 4], 
                 [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


## Slicing ten

In [7]:
print(x.size())
print(x[:, 1]) 
print(x[0, :]) 

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [8]:
y = x[1, 1]
print(y)
print(y.item())

tensor(4)
4


## Reshaping 


In [9]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [10]:
y = x.view(6,-1) 
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


## STO

In [11]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [12]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [13]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


## Numpy  PyTorch

In [14]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [15]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[-0.22885007 -0.67411774  0.25298311  1.47519692  0.20738995]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-0.2289, -0.6741,  0.2530,  1.4752,  0.2074], dtype=torch.float64)


In [16]:
np.add(a, 1, out=a)
print(a)
print(a_pt) 

[0.77114993 0.32588226 1.25298311 2.47519692 1.20738995]
tensor([0.7711, 0.3259, 1.2530, 2.4752, 1.2074], dtype=torch.float64)


In [17]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)

CPU times: total: 93.8 ms
Wall time: 80.7 ms


In [18]:
%%time
for i in range(100):
  a = torch.randn([100, 100])
  b = torch.randn([100, 100])
  c = torch.matmul(a, b)

CPU times: total: 46.9 ms
Wall time: 42.5 ms


In [19]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b

CPU times: total: 33.8 s
Wall time: 35.3 s


In [20]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000])
  b = torch.randn([10000, 10000])
  c = a + b

CPU times: total: 14.5 s
Wall time: 10.2 s


## CUDA 

In [26]:
print(torch.cuda.device_count())

0


In [27]:
import torch

device = torch.device("cpu")
print(f"Using device: {device}")
print(f"CUDA available: {torch.cuda.is_available()}")

Using device: cpu
CUDA available: False


In [28]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x000001C6729D39D0>


AssertionError: Torch not compiled with CUDA enabled

In [29]:
cuda0 = torch.device('cuda:0')

In [30]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

AssertionError: Torch not compiled with CUDA enabled

In [31]:

import torch

a = torch.ones(3, 2, device='cpu')
b = torch.ones(3, 2, device='cpu')
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


In [32]:
print(a)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [33]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

CPU times: total: 32.9 s
Wall time: 33.7 s


In [34]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

CPU times: total: 11.9 s
Wall time: 8.91 s


In [36]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device='cpu')
  b = torch.randn([10000, 10000], device='cpu')
  b.add_(a)

CPU times: total: 12.2 s
Wall time: 9.56 s


In [37]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

CPU times: total: 32min 8s
Wall time: 3min 33s


In [38]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

CPU times: total: 13min 37s
Wall time: 1min 31s


In [39]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device='cpu')
  b = torch.randn([10000, 10000], device='cpu')
  torch.matmul(a, b)

CPU times: total: 13min 26s
Wall time: 1min 30s


## Autodiff

In [40]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [42]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [41]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [43]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [44]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [45]:
t.backward()

In [47]:
print(x.grad)

None


In [48]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [49]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


## Autodiff ex

In [50]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [51]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [52]:
print(loss)

tensor(240.5295, grad_fn=<SumBackward0>)


In [53]:
loss.backward()

In [54]:
print(w.grad, b.grad)

tensor([-51.3099]) tensor([126.1464])


##  loop

In [55]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(10):
  
  x = torch.randn([20, 1])
  y = 3*x - 2
  
  y_hat = w*x + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())
  

1.0 1.0
1.5782634019851685 -0.21871495246887207
2.1210508346557617 -0.8477856516838074
2.653946876525879 -1.2839107513427734
2.8102939128875732 -1.5840167999267578
2.852811574935913 -1.7429653406143188
2.925963878631592 -1.843009352684021
2.930065155029297 -1.8984636068344116
2.959040880203247 -1.941817045211792
2.9651601314544678 -1.957231879234314
2.980543851852417 -1.9792405366897583


## large problem

In [56]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)



for i in range(epochs):
  
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2
  
  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()


  

CPU times: total: 58 s
Wall time: 14.8 s


In [58]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device='cpu')
b = torch.ones([1], requires_grad=True, device='cpu')


for i in range(epochs):
  
  x = torch.randn([N], device='cpu')
  y = torch.dot(3*torch.ones([N], device='cpu'), x) - 2
  
  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  

CPU times: total: 58.5 s
Wall time: 15.3 s
