In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.9.1+rocmsdk20260116
True


In [2]:
X = torch.tensor([[1.0, 4.0, 7.0], [2.0, 3.0, 6.0]])
X

tensor([[1., 4., 7.],
        [2., 3., 6.]])

In [3]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Device count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))


Torch version: 2.9.1+rocmsdk20260116
CUDA available: True
CUDA version: None
Device count: 1
GPU name: AMD Radeon RX 7900 XTX


In [4]:
X.shape

torch.Size([2, 3])

In [5]:
X.dtype 

torch.float32

In [6]:
X[0, 1]  # Accessing the element at first row and second column

tensor(4.)

In [7]:
X[:, 2]  # Accessing all rows in the third column

tensor([7., 6.])

In [8]:
10 * (X + 1)

tensor([[20., 50., 80.],
        [30., 40., 70.]])

In [9]:
X.exp()

tensor([[   2.7183,   54.5982, 1096.6332],
        [   7.3891,   20.0855,  403.4288]])

In [10]:
X.mean()

tensor(3.8333)

In [11]:
X.max(dim=0)

torch.return_types.max(
values=tensor([2., 4., 7.]),
indices=tensor([1, 0, 0]))

In [12]:
X @ X.T

tensor([[66., 56.],
        [56., 49.]])

In [13]:
import numpy as np

In [14]:
X.numpy()

array([[1., 4., 7.],
       [2., 3., 6.]], dtype=float32)

In [15]:
torch.tensor(np.array([[1.0, 4.0, 7.0], [2.0, 3.0, 6.0]]))

tensor([[1., 4., 7.],
        [2., 3., 6.]], dtype=torch.float64)

In [16]:
torch.FloatTensor(np.array([[1.0, 4.0, 7.0], [2.0, 3.0, 6.0]]))

tensor([[1., 4., 7.],
        [2., 3., 6.]])

In [17]:
X[:, 1] = -99

In [18]:
X

tensor([[  1., -99.,   7.],
        [  2., -99.,   6.]])

In [19]:
X.relu()

tensor([[1., 0., 7.],
        [2., 0., 6.]])

In [20]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

In [21]:
M = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

In [22]:
M = M.to(device)

In [23]:
M.device

device(type='cuda', index=0)

In [24]:
M = torch.tensor([[1, 2, 3], [4, 5, 6]], device=device)

In [25]:
R = M.float() @  M.T.float()
R.device

device(type='cuda', index=0)

In [26]:
R

tensor([[14., 32.],
        [32., 77.]], device='cuda:0')

In [27]:
M = torch.rand((1000, 1000))
%timeit M @ M.T

67.7 ms ± 3.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [28]:
M = torch.rand((1000, 1000), device=device)
%timeit M @ M.T

11.4 μs ± 69.8 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [29]:
M = torch.randn((1000, 1000), device='cuda')

def matmul_sync():
    result = M @ M.T
    torch.cuda.synchronize()
    return result

%timeit matmul_sync()


starter = torch.cuda.Event(enable_timing=True)
ender = torch.cuda.Event(enable_timing=True)

# Warmup
for _ in range(10):
    _ = M @ M.T
torch.cuda.synchronize()

# Time it
starter.record()
for _ in range(100):
    _ = M @ M.T
ender.record()
torch.cuda.synchronize()

print(f"{starter.elapsed_time(ender) / 100:.3f} ms per matmul")

374 μs ± 96.7 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
0.167 ms per matmul


In [30]:
x = torch.tensor(5.0, requires_grad=True)
f = x ** 2
f


tensor(25., grad_fn=<PowBackward0>)

In [31]:
f.backward()


In [32]:
x.grad

tensor(10.)

In [33]:
a = 0.1
with torch.no_grad():
    x -= a * x.grad

In [34]:
x.grad.zero_()

tensor(0.)

In [35]:
learning_rate = 0.1
x = torch.tensor(5.0, requires_grad=True)
for iteration in range(10):
    f = x ** 2
    f.backward()
    with torch.no_grad():
        x-= learning_rate * x.grad
    x.grad.zero_()

In [None]:
t = torch.tensor(2.0, requires_grad=True)
z = t.exp()
z = z + 1
z.backward()

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of ExpBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [36]:
from sklearn.datasets import fetch_california_housing   
import numpy as np
import torch

housing = fetch_california_housing(as_frame=True)   

In [37]:
from sklearn.model_selection import train_test_split
X_train, X_temp, y_train, y_temp = train_test_split(
    housing.data, housing.target, test_size=0.36, random_state=42
)

X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, test_size=0.56, random_state=42
)


In [38]:
X_train = torch.FloatTensor(X_train.values)
X_valid = torch.FloatTensor(X_valid.values)
X_test = torch.FloatTensor(X_test.values)
means = X_train.mean(dim=0, keepdim =True)
stds = X_train.std(dim=0, keepdim=True)
X_train = (X_train - means) / stds
X_test = (X_test - means) / stds
X_valid = (X_valid - means) / stds

  X_train = torch.FloatTensor(X_train.values)


In [39]:
y_train = torch.FloatTensor(y_train.values).view(-1, 1)
y_valid = torch.FloatTensor(y_valid.values).view(-1, 1)
y_test = torch.FloatTensor(y_test.values).view(-1, 1)

In [40]:
torch.manual_seed(42)
n_features = X_train.shape[1]
w = torch.randn((n_features, 1), requires_grad=True)
b = torch.tensor(0., requires_grad=True)

In [41]:
learning_rate = 0.1
n_epoches = 1000
for epoch in range(n_epoches):
    y_pred = X_train @ w + b 
    loss = ((y_pred - y_train) ** 2).mean()
    loss.backward()
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        w.grad.zero_()
        b.grad.zero_()
    print(f'Epoch{epoch + 1} / {n_epoches}, Loss: {loss.item():.4f}')

Epoch1 / 1000, Loss: 16.0319
Epoch2 / 1000, Loss: 7.8154
Epoch3 / 1000, Loss: 4.3351
Epoch4 / 1000, Loss: 2.7415
Epoch5 / 1000, Loss: 1.9445
Epoch6 / 1000, Loss: 1.5101
Epoch7 / 1000, Loss: 1.2553
Epoch8 / 1000, Loss: 1.0970
Epoch9 / 1000, Loss: 0.9944
Epoch10 / 1000, Loss: 0.9254
Epoch11 / 1000, Loss: 0.8775
Epoch12 / 1000, Loss: 0.8433
Epoch13 / 1000, Loss: 0.8181
Epoch14 / 1000, Loss: 0.7988
Epoch15 / 1000, Loss: 0.7835
Epoch16 / 1000, Loss: 0.7710
Epoch17 / 1000, Loss: 0.7605
Epoch18 / 1000, Loss: 0.7513
Epoch19 / 1000, Loss: 0.7431
Epoch20 / 1000, Loss: 0.7356
Epoch21 / 1000, Loss: 0.7288
Epoch22 / 1000, Loss: 0.7223
Epoch23 / 1000, Loss: 0.7162
Epoch24 / 1000, Loss: 0.7104
Epoch25 / 1000, Loss: 0.7049
Epoch26 / 1000, Loss: 0.6996
Epoch27 / 1000, Loss: 0.6944
Epoch28 / 1000, Loss: 0.6895
Epoch29 / 1000, Loss: 0.6847
Epoch30 / 1000, Loss: 0.6801
Epoch31 / 1000, Loss: 0.6756
Epoch32 / 1000, Loss: 0.6713
Epoch33 / 1000, Loss: 0.6671
Epoch34 / 1000, Loss: 0.6630
Epoch35 / 1000, Loss: 

In [42]:
X_new = X_test[:3]
with torch.no_grad():
    y_pred = X_new @ w + b
y_pred

tensor([[1.9950],
        [1.0269],
        [4.0733]])

In [43]:
import torch.nn as nn 

torch.manual_seed(42)
model = nn.Linear( in_features= n_features, out_features=1)

In [44]:
model.bias

Parameter containing:
tensor([0.3117], requires_grad=True)

In [45]:
model.weight, model.bias

(Parameter containing:
 tensor([[ 0.2703,  0.2935, -0.0828,  0.3248, -0.0775,  0.0713, -0.1721,  0.2076]],
        requires_grad=True),
 Parameter containing:
 tensor([0.3117], requires_grad=True))

In [46]:
model(X_train[:2])

tensor([[0.1839],
        [1.1113]], grad_fn=<AddmmBackward0>)

In [47]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse = nn.MSELoss()

In [None]:
def train_bgd(model, optimizer, criterion, X_train, y_train, n_epochs):
    for epoch in range(n_epochs):
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(f'Epoch {epoch + 1} / {n_epochs}, Loss: {loss.item():.4f}')

In [49]:
train_bgd(model, optimizer, mse, X_train, y_train, n_epoches)

Epoch 1 / 1000, Loss: 4.2651
Epoch 2 / 1000, Loss: 2.9254
Epoch 3 / 1000, Loss: 2.0820
Epoch 4 / 1000, Loss: 1.5478
Epoch 5 / 1000, Loss: 1.2081
Epoch 6 / 1000, Loss: 0.9913
Epoch 7 / 1000, Loss: 0.8523
Epoch 8 / 1000, Loss: 0.7629
Epoch 9 / 1000, Loss: 0.7049
Epoch 10 / 1000, Loss: 0.6671
Epoch 11 / 1000, Loss: 0.6420
Epoch 12 / 1000, Loss: 0.6252
Epoch 13 / 1000, Loss: 0.6137
Epoch 14 / 1000, Loss: 0.6055
Epoch 15 / 1000, Loss: 0.5995
Epoch 16 / 1000, Loss: 0.5950
Epoch 17 / 1000, Loss: 0.5914
Epoch 18 / 1000, Loss: 0.5884
Epoch 19 / 1000, Loss: 0.5858
Epoch 20 / 1000, Loss: 0.5835
Epoch 21 / 1000, Loss: 0.5814
Epoch 22 / 1000, Loss: 0.5794
Epoch 23 / 1000, Loss: 0.5776
Epoch 24 / 1000, Loss: 0.5758
Epoch 25 / 1000, Loss: 0.5741
Epoch 26 / 1000, Loss: 0.5725
Epoch 27 / 1000, Loss: 0.5710
Epoch 28 / 1000, Loss: 0.5695
Epoch 29 / 1000, Loss: 0.5680
Epoch 30 / 1000, Loss: 0.5666
Epoch 31 / 1000, Loss: 0.5652
Epoch 32 / 1000, Loss: 0.5639
Epoch 33 / 1000, Loss: 0.5626
Epoch 34 / 1000, Lo

In [50]:
X_new =X_test[:3]
with torch.no_grad():
    y_pred = model(X_new)

y_pred

tensor([[1.9950],
        [1.0269],
        [4.0733]])

In [51]:
import torch.nn as nn

torch.manual_seed(42)       
model = nn.Sequential(
    nn.Linear(n_features, 50),
    nn.ReLU(),
    nn.Linear(50, 40),
    nn.ReLU(),
    nn.Linear(40, 1)
)

In [55]:
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
mse = nn.MSELoss()
train_bgd(model, optimizer, mse, X_train, y_train, n_epoches)

Epoch 1 / 1000, Loss: 0.3504
Epoch 2 / 1000, Loss: 0.3503
Epoch 3 / 1000, Loss: 0.3501
Epoch 4 / 1000, Loss: 0.3500
Epoch 5 / 1000, Loss: 0.3498
Epoch 6 / 1000, Loss: 0.3499
Epoch 7 / 1000, Loss: 0.3517
Epoch 8 / 1000, Loss: 0.3702
Epoch 9 / 1000, Loss: 0.5405
Epoch 10 / 1000, Loss: 1.8131
Epoch 11 / 1000, Loss: 4.4146
Epoch 12 / 1000, Loss: 1.7431
Epoch 13 / 1000, Loss: 0.9755
Epoch 14 / 1000, Loss: 0.7097
Epoch 15 / 1000, Loss: 0.5903
Epoch 16 / 1000, Loss: 0.5345
Epoch 17 / 1000, Loss: 0.5044
Epoch 18 / 1000, Loss: 0.4862
Epoch 19 / 1000, Loss: 0.4735
Epoch 20 / 1000, Loss: 0.4636
Epoch 21 / 1000, Loss: 0.4554
Epoch 22 / 1000, Loss: 0.4481
Epoch 23 / 1000, Loss: 0.4416
Epoch 24 / 1000, Loss: 0.4357
Epoch 25 / 1000, Loss: 0.4303
Epoch 26 / 1000, Loss: 0.4255
Epoch 27 / 1000, Loss: 0.4211
Epoch 28 / 1000, Loss: 0.4172
Epoch 29 / 1000, Loss: 0.4136
Epoch 30 / 1000, Loss: 0.4104
Epoch 31 / 1000, Loss: 0.4074
Epoch 32 / 1000, Loss: 0.4047
Epoch 33 / 1000, Loss: 0.4022
Epoch 34 / 1000, Lo

In [61]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size =512, shuffle=True)

In [62]:
torch.manual_seed(42)
model = nn.Sequential(
    nn.Linear(n_features, 50),
    nn.ReLU(),
    nn.Linear(50, 40),
    nn.ReLU(),
    nn.Linear(40, 1)
    )
model = model.to(device)

In [63]:
def train(optimizer, criterion, model, train_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        total_loss = 0.
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss+= loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        mean_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch + 1} / {n_epochs}, Loss: {mean_loss:.4f}')

In [64]:
train(optimizer, mse, model, train_loader, n_epoches)

Epoch 1 / 1000, Loss: 4.9775
Epoch 2 / 1000, Loss: 4.9791
Epoch 3 / 1000, Loss: 4.9772
Epoch 4 / 1000, Loss: 4.9774
Epoch 5 / 1000, Loss: 4.9785
Epoch 6 / 1000, Loss: 4.9765
Epoch 7 / 1000, Loss: 4.9826
Epoch 8 / 1000, Loss: 4.9823
Epoch 9 / 1000, Loss: 4.9768
Epoch 10 / 1000, Loss: 4.9768
Epoch 11 / 1000, Loss: 4.9791
Epoch 12 / 1000, Loss: 4.9818
Epoch 13 / 1000, Loss: 4.9777
Epoch 14 / 1000, Loss: 4.9774
Epoch 15 / 1000, Loss: 4.9777
Epoch 16 / 1000, Loss: 4.9796
Epoch 17 / 1000, Loss: 4.9798
Epoch 18 / 1000, Loss: 4.9806
Epoch 19 / 1000, Loss: 4.9790
Epoch 20 / 1000, Loss: 4.9781
Epoch 21 / 1000, Loss: 4.9790
Epoch 22 / 1000, Loss: 4.9771
Epoch 23 / 1000, Loss: 4.9769
Epoch 24 / 1000, Loss: 4.9792
Epoch 25 / 1000, Loss: 4.9798
Epoch 26 / 1000, Loss: 4.9800
Epoch 27 / 1000, Loss: 4.9788
Epoch 28 / 1000, Loss: 4.9792
Epoch 29 / 1000, Loss: 4.9765
Epoch 30 / 1000, Loss: 4.9769
Epoch 31 / 1000, Loss: 4.9769
Epoch 32 / 1000, Loss: 4.9783
Epoch 33 / 1000, Loss: 4.9777
Epoch 34 / 1000, Lo

In [60]:
print(device)
print(next(model.parameters()).device)


cuda
cuda:0
