# Fundamentals of Pytorch

In [1]:
import torch

In [3]:
X = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)
X

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [3]:
X.shape

torch.Size([2, 3])

In [4]:
X.dtype

torch.float32

In [5]:
X[0, 2]

tensor(3.)

In [6]:
print(X.mean())
print(X.sum())
print(X.std())
print(X.abs())
print(X.argmax())
print(X.exp())
print(X.sqrt())

tensor(3.5000)
tensor(21.)
tensor(1.8708)
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor(5)
tensor([[  2.7183,   7.3891,  20.0855],
        [ 54.5981, 148.4132, 403.4288]])
tensor([[1.0000, 1.4142, 1.7321],
        [2.0000, 2.2361, 2.4495]])


In [7]:
10 * (X + 3)

tensor([[40., 50., 60.],
        [70., 80., 90.]])

In [8]:
X.T @ X

tensor([[17., 22., 27.],
        [22., 29., 36.],
        [27., 36., 45.]])

### Convert it to numpy array

In [4]:
import numpy as np
X.numpy()

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

In [10]:
Y = np.array([[77, 88, 99], [10, 11, 12]])
Y

array([[77, 88, 99],
       [10, 11, 12]])

In [11]:
torch.tensor(Y) # default float presision is float64

tensor([[77, 88, 99],
        [10, 11, 12]])

In [12]:
torch.FloatTensor(Y) # specify float32

tensor([[77., 88., 99.],
        [10., 11., 12.]])

### Inplace Operations

In [13]:
X.relu_()

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [14]:
X.sqrt_()

tensor([[1.0000, 1.4142, 1.7321],
        [2.0000, 2.2361, 2.4495]])

In [15]:
X.zero_()

tensor([[0., 0., 0.],
        [0., 0., 0.]])

### Hardware Acceleration

In [16]:
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

print(f'Using device: {device}')

Using device: cpu


Convert tensor-cpu to tensor-gpu

In [17]:
M = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)
M = M.to(device)
M.device

device(type='cpu')

In [18]:
# Directly create tensor on GPU
M = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32, device=device)
M.device

device(type='cpu')

In [19]:
R = M @ M.T
R # the result tensor is also on the GPU

tensor([[14., 32.],
        [32., 77.]])

In [20]:
K = torch.randn((1000, 1000))
%timeit K @ K.T


22.7 ms ± 2.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
# run this on gpu or google colab with gpu enabled
K = torch.randn((1000, 1000), device='cuda')
%timeit K @ K.T


### AutoGrad

In [22]:
z = torch.tensor(5.0, requires_grad=True)
f = z**3 + z**2 + z
f

tensor(155., grad_fn=<AddBackward0>)

In [23]:
f.backward()
z.grad

tensor(86.)

In [24]:
# for sin function + exponent
y = torch.tensor(2.0, requires_grad=True)
f = 2 * torch.sin(y) + 10 * torch.exp(y) 
f

tensor(75.7092, grad_fn=<AddBackward0>)

In [25]:
f.backward()
z.grad

tensor(86.)

In [35]:
x = torch.tensor(5.0, requires_grad=True)
f = x ** 2
f


tensor(25., grad_fn=<PowBackward0>)

In [36]:
f.backward()
x.grad

tensor(10.)

In [37]:
learning_rate = 0.1
with torch.no_grad():
    x = x - learning_rate * x.grad
x

tensor(4.)

In [38]:
x = torch.tensor(5.0, requires_grad=True)
f = x ** 2
f.backward()
x.grad
x_detached = x.detach()
x_detached = x_detached - learning_rate * x.grad
x_detached

tensor(4.)

In [46]:
learning_rate = 0.1
x = torch.tensor(5.0, requires_grad=True)

for i in range(10):
    f = x ** 2
    f.backward()
    with torch.no_grad():
        x -= learning_rate * x.grad
    x.grad.zero_()
    print(f'Iteration {i+1}: x = {x.item()}, f(x) = {f.item()}')

x

Iteration 1: x = 4.0, f(x) = 25.0
Iteration 2: x = 3.200000047683716, f(x) = 16.0
Iteration 3: x = 2.559999942779541, f(x) = 10.24000072479248
Iteration 4: x = 2.047999858856201, f(x) = 6.553599834442139
Iteration 5: x = 1.6383998394012451, f(x) = 4.194303512573242
Iteration 6: x = 1.3107198476791382, f(x) = 2.684354066848755
Iteration 7: x = 1.0485758781433105, f(x) = 1.7179864645004272
Iteration 8: x = 0.8388606905937195, f(x) = 1.0995113849639893
Iteration 9: x = 0.6710885763168335, f(x) = 0.7036872506141663
Iteration 10: x = 0.5368708372116089, f(x) = 0.45035988092422485


tensor(0.5369, requires_grad=True)

### Linear Regression Using Tensor and AutoGrad - Low Level API

In [6]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test =  train_test_split(housing.data, housing.target, test_size=0.2, random_state=42)

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [7]:
# Convert to torch tensors
X_train = torch.FloatTensor(X_train)
X_valid = torch.FloatTensor(X_valid)
X_test = torch.FloatTensor(X_test)

# Means and Standard Deviation
means = X_train.mean(dim=0, keepdims=True)
stds = X_train.std(dim=0, keepdims=True)

# Normalization of data
X_train = (X_train - means) / stds
X_valid = (X_valid - means) / stds
X_test = (X_test - means) / stds

In [8]:
y_train = torch.FloatTensor(y_train.reshape(-1, 1))
y_test= torch.FloatTensor(y_test.reshape(-1, 1))
y_valid = torch.FloatTensor(y_valid.reshape(-1, 1))

In [9]:
torch.manual_seed(42)
n_features = X_train.shape[1]
# initiazlize the weights and bias
w = torch.randn((n_features,1), requires_grad=True)
b = torch.tensor(0., requires_grad=True)


In [10]:
# Train the model
learning_rate = 0.4
n_epochs = 20

for epoch in range(n_epochs):
    y_pred = X_train @ w + b
    loss = ((y_pred - y_train) ** 2).mean()
    loss.backward()
    with torch.no_grad():
        b -= learning_rate * b.grad
        w -= learning_rate * w.grad

        b.grad.zero_()
        w.grad.zero_()
    print(f'Epoch {epoch + 1} / {n_epochs}, Loss: {loss.item()} ')

Epoch 1 / 20, Loss: 16.006189346313477 
Epoch 2 / 20, Loss: 4.656647205352783 
Epoch 3 / 20, Loss: 2.104856491088867 
Epoch 4 / 20, Loss: 1.2392680644989014 
Epoch 5 / 20, Loss: 0.9124192595481873 
Epoch 6 / 20, Loss: 0.7779623866081238 
Epoch 7 / 20, Loss: 0.7152512073516846 
Epoch 8 / 20, Loss: 0.6805720329284668 
Epoch 9 / 20, Loss: 0.6576956510543823 
Epoch 10 / 20, Loss: 0.6404302716255188 
Epoch 11 / 20, Loss: 0.6263097524642944 
Epoch 12 / 20, Loss: 0.6142743229866028 
Epoch 13 / 20, Loss: 0.6038089990615845 
Epoch 14 / 20, Loss: 0.5946190357208252 
Epoch 15 / 20, Loss: 0.586506187915802 
Epoch 16 / 20, Loss: 0.5793206095695496 
Epoch 17 / 20, Loss: 0.5729407668113708 
Epoch 18 / 20, Loss: 0.5672648549079895 
Epoch 19 / 20, Loss: 0.5622056126594543 
Epoch 20 / 20, Loss: 0.5576880574226379 


In [11]:
# make predications
X_new = X_test[:3]
with torch.no_grad():    
    y_pred = X_new @ w + b 

y_pred

tensor([[0.9118],
        [1.6231],
        [2.6630]])

Implement Linear Regression through High Level API 

In [12]:
import torch.nn as nn

torch.manual_seed(42)
model = nn.Linear(in_features=n_features, out_features=1)

In [51]:
# Model Bias
model.bias

Parameter containing:
tensor([0.2725, 0.0523], requires_grad=True)

In [53]:
# Model Weight
model.weight

Parameter containing:
tensor([[ 0.2703,  0.2935, -0.0828,  0.3248, -0.0775,  0.0713, -0.1721,  0.2076]],
       requires_grad=True)

In [13]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2703,  0.2935, -0.0828,  0.3248, -0.0775,  0.0713, -0.1721,  0.2076]],
       requires_grad=True)
Parameter containing:
tensor([0.3117], requires_grad=True)


In [14]:
# name_parameters(): pair of name and value
for name_param in model.named_parameters():
    print(name_param[0] , ' => ', name_param[1])

weight  =>  Parameter containing:
tensor([[ 0.2703,  0.2935, -0.0828,  0.3248, -0.0775,  0.0713, -0.1721,  0.2076]],
       requires_grad=True)
bias  =>  Parameter containing:
tensor([0.3117], requires_grad=True)


In [15]:
# Call this model as normal function
model(X_train[:3])

tensor([[ 0.4296],
        [ 1.1455],
        [-0.2709]], grad_fn=<AddmmBackward0>)

In [16]:
# optimizers
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse = nn.MSELoss()

In [17]:
# train model function
def train_model(model, optimizer, criterion, X_train, y_train,n_epochs):
    for epoch in range(n_epochs):
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {loss.item()}")

In [18]:
# call the train model function
train_model(model, optimizer,mse, X_train, y_train, n_epochs)

Epoch 1/20, Loss: 4.272577285766602
Epoch 2/20, Loss: 0.7673616409301758
Epoch 3/20, Loss: 0.6151816844940186
Epoch 4/20, Loss: 0.5950986742973328
Epoch 5/20, Loss: 0.5839646458625793
Epoch 6/20, Loss: 0.5752211213111877
Epoch 7/20, Loss: 0.567899227142334
Epoch 8/20, Loss: 0.5616247057914734
Epoch 9/20, Loss: 0.5561909079551697
Epoch 10/20, Loss: 0.5514596104621887
Epoch 11/20, Loss: 0.5473267436027527
Epoch 12/20, Loss: 0.5437079668045044
Epoch 13/20, Loss: 0.5405330061912537
Epoch 14/20, Loss: 0.5377423763275146
Epoch 15/20, Loss: 0.535285234451294
Epoch 16/20, Loss: 0.5331177115440369
Epoch 17/20, Loss: 0.5312021970748901
Epoch 18/20, Loss: 0.529506504535675
Epoch 19/20, Loss: 0.5280026197433472
Epoch 20/20, Loss: 0.5266667008399963


NOTE: nn.Linear module initializes the parameters slightly
 differently: it uses a uniform random distribution from 
−√2
 4 to 
+√2
 4
 for both the weights and the bias term (we will
 discuss initialization method

In [19]:
# make predications
X_new = X_test[:3]
with torch.no_grad():    
    y_pred = model(X_new)

y_pred

tensor([[0.8226],
        [1.6903],
        [2.6812]])

### Implementing a Regression MLP

In [None]:
import torch.nn as nn

torch.manual_seed(42)

model = nn.Sequential(
    nn.Linear(n_features, 50),
    nn.ReLU(),
    nn.Linear(50, 40),
    nn.ReLU(),
    nn.Linear(40, 1)
)

In [20]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse = nn.MSELoss()
train_model(model, optimizer, mse, X_train, y_train, n_epochs)


Epoch 1/20, Loss: 0.5254774689674377
Epoch 2/20, Loss: 0.5252067446708679
Epoch 3/20, Loss: 0.5249435305595398
Epoch 4/20, Loss: 0.5246874094009399
Epoch 5/20, Loss: 0.5244382619857788
Epoch 6/20, Loss: 0.5241959691047668
Epoch 7/20, Loss: 0.5239601135253906
Epoch 8/20, Loss: 0.5237306952476501
Epoch 9/20, Loss: 0.5235074162483215
Epoch 10/20, Loss: 0.5232901573181152
Epoch 11/20, Loss: 0.5230786204338074
Epoch 12/20, Loss: 0.5228727459907532
Epoch 13/20, Loss: 0.5226723551750183
Epoch 14/20, Loss: 0.5224772095680237
Epoch 15/20, Loss: 0.5222871899604797
Epoch 16/20, Loss: 0.5221022367477417
Epoch 17/20, Loss: 0.5219220519065857
Epoch 18/20, Loss: 0.5217465758323669
Epoch 19/20, Loss: 0.5215755105018616
Epoch 20/20, Loss: 0.5214090943336487


In [22]:
with torch.no_grad():
    y_pred = model(X_new)

y_pred

tensor([[0.7987],
        [1.7068],
        [2.7039]])