In [43]:
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import seaborn
import torch
import numpy as np

In [44]:
# form a tensor(multi-dimension matrix, which will sent to the model)
np.set_printoptions(precision=5)
a = [1,2,3]
b = np.array([4, 5, 6], dtype= np.int32)
t_a = torch.tensor(a)
t_b = torch.from_numpy(b)
print(t_b)
print(t_a)

tensor([4, 5, 6], dtype=torch.int32)
tensor([1, 2, 3])


In [45]:
t_ones = torch.ones((2,3),dtype = int)
t_ones.shape

torch.Size([2, 3])

In [46]:
print(t_ones)

tensor([[1, 1, 1],
        [1, 1, 1]])


In [47]:
# form a random tensor
rand_tensor = torch.rand(2,3)
print(rand_tensor)

tensor([[0.6826, 0.3051, 0.4635],
        [0.4550, 0.5725, 0.4980]])


# Manipulate torch type

In [48]:
# change torch dtype
t_a_new = t_a.to(torch.int64)
print(t_a.dtype)


torch.int64


In [49]:
t = torch.rand(2,3)

In [50]:
t = torch.rand(2,3)
t_tr = torch.transpose(t, 0, 1)
t_tr

tensor([[0.3398, 0.7718],
        [0.5239, 0.0112],
        [0.7981, 0.8100]])

In [51]:
# reshape a tensor (from 1 dimension to 2 dimension):
t = torch.zeros(30)
t_reshape = t.reshape(5,6)
t_reshape.shape

torch.Size([5, 6])

In [52]:
# remove the unnecessary dimensions
t = torch.zeros(1,2,1,4,1)
t_sqz = torch.squeeze(t, 4)
print(t_sqz.shape)

torch.Size([1, 2, 1, 4])


# Apply mathematical operations

In [53]:
torch.manual_seed(1)
t1 = 2 * torch.rand(5,2) - 1
t2 = torch.normal(mean = 0, std = 1, size = (5, 2))
t2

tensor([[ 0.8590,  0.7056],
        [-0.3406, -1.2720],
        [-1.1948,  0.0250],
        [-0.7627,  1.3969],
        [-0.3245,  0.2879]])

In [54]:
t3 = torch.multiply(t1, t2)
t3

tensor([[ 0.4426, -0.3114],
        [ 0.0660, -0.5970],
        [ 1.1249,  0.0150],
        [ 0.1569,  0.7107],
        [-0.0451, -0.0352]])

In [55]:
t4 = torch.mean(t1, axis = 0)
t4

tensor([-0.1373,  0.2028])

In [56]:
t5 = torch.matmul(t1, torch.transpose(t2, 0, 1))
print(t5)

tensor([[ 0.1312,  0.3860, -0.6267, -1.0096, -0.2943],
        [ 0.1647, -0.5310,  0.2434,  0.8035,  0.1980],
        [-0.3855, -0.4422,  1.1399,  1.5558,  0.4781],
        [ 0.1822, -0.5771,  0.2585,  0.8676,  0.2132],
        [ 0.0330,  0.1084, -0.1692, -0.2771, -0.0804]])


In [57]:
# calculate the norm of a matrix
norm_t1 = torch.linalg.norm(t1, ord = 2, dim = 1)
print(norm_t1)

tensor([0.6785, 0.5078, 1.1162, 0.5488, 0.1853])


## Split , stack, and concatenate tensors

In [58]:
torch.manual_seed(1)
t = torch.rand(6)
print(t)
t_split = torch.chunk(t,chunks = 2)
[item.numpy() for item in t_split]

tensor([0.7576, 0.2793, 0.4031, 0.7347, 0.0293, 0.7999])


[array([0.75763, 0.27931, 0.40307], dtype=float32),
 array([0.73468, 0.02928, 0.79986], dtype=float32)]

In [59]:
torch.manual_seed(2)
t = torch.rand(5)
t_split1 = torch.split(t,split_size_or_sections = (3,2))

[item.numpy() for item in t_split1]

[array([0.6147 , 0.38101, 0.63711], dtype=float32),
 array([0.47446, 0.71359], dtype=float32)]

In [60]:
A = torch.ones(3)
B = torch.zeros(3)
C = torch.cat([A, B], axis = 0)
print(C)


tensor([1., 1., 1., 0., 0., 0.])


In [61]:
A = torch.ones(3)
B = torch.zeros(3)
C = torch.stack([A, B], axis = 1)
C

tensor([[1., 0.],
        [1., 0.],
        [1., 0.]])

# Builiding input piplines in PyTorch 

# Create a PyTorch Dataloader from existing tensors

In [62]:
from torch.utils.data import DataLoader
t = torch.arange(6, dtype = torch.float32)
data_loader = DataLoader(t)


In [63]:
for i in data_loader:
  print(i)

tensor([0.])
tensor([1.])
tensor([2.])
tensor([3.])
tensor([4.])
tensor([5.])


In [64]:
# create a batch from this dataset 
data_loader = DataLoader(t, batch_size= 4, drop_last= False)
for i, j in enumerate(data_loader):
  print(f'batch{i}: {j}')
  

batch0: tensor([0., 1., 2., 3.])
batch1: tensor([4., 5.])


## Combining two tensors into a dataset

In [65]:
torch.manual_seed(1)
t_x = torch.rand([4,3], dtype = torch.float32)
t_y = torch.arange(4)

In [66]:
t_x

tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999],
        [0.3971, 0.7544, 0.5695],
        [0.4388, 0.6387, 0.5247]])

In [114]:
# set a Dataset class
from torch.utils.data import Dataset
class JointDataset():
  def __init__(self, x, y):
    self.x = x
    self.y = y
  
  def __len__(self):
    return len(self.x)
  
  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

In [115]:
joint_dataset = JointDataset(t_x, t_y)
# joint_dataset.__getitem__(3)

In [116]:
for example in joint_dataset:
  print('x: ', example[0], 'y: ', example[1])

x:  tensor([0.7576, 0.2793, 0.4031]) y:  tensor(0)
x:  tensor([0.7347, 0.0293, 0.7999]) y:  tensor(1)
x:  tensor([0.3971, 0.7544, 0.5695]) y:  tensor(2)
x:  tensor([0.4388, 0.6387, 0.5247]) y:  tensor(3)


In [141]:
# shuffle, batch, and repeat 
torch.manual_seed(1)
data_loader = DataLoader(dataset= joint_dataset, batch_size= 1, shuffle = True)

In [120]:
t_x

tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999],
        [0.3971, 0.7544, 0.5695],
        [0.4388, 0.6387, 0.5247]])

In [138]:
for i, batch in enumerate(data_loader, 1):
  print(f'batch{i}: ', 'x: ', batch[0], 'y: ', batch[1])

batch1:  x:  tensor([[0.7576, 0.2793, 0.4031],
        [0.3971, 0.7544, 0.5695]]) y:  tensor([0, 2])
batch2:  x:  tensor([[0.7347, 0.0293, 0.7999],
        [0.4388, 0.6387, 0.5247]]) y:  tensor([1, 3])


In [142]:
# shuffle the dataset for twice
for epoch in range(2):
  print(f'epoch{epoch+1}')
  for i, batch in enumerate(data_loader,1):
    print(f'batch{i}, {batch[0]}, {batch[1]}')

epoch1
batch1, tensor([[0.3971, 0.7544, 0.5695]]), tensor([2])
batch2, tensor([[0.7576, 0.2793, 0.4031]]), tensor([0])
batch3, tensor([[0.7347, 0.0293, 0.7999]]), tensor([1])
batch4, tensor([[0.4388, 0.6387, 0.5247]]), tensor([3])
epoch2
batch1, tensor([[0.7576, 0.2793, 0.4031]]), tensor([0])
batch2, tensor([[0.3971, 0.7544, 0.5695]]), tensor([2])
batch3, tensor([[0.7347, 0.0293, 0.7999]]), tensor([1])
batch4, tensor([[0.4388, 0.6387, 0.5247]]), tensor([3])
