In [1]:
import torch; print(torch.cuda.is_available(), torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")

True NVIDIA GeForce GTX 1050 Ti


In [2]:
# Reproducibility
torch.manual_seed(42)

# ===== 1) Generate synthetic data y = w*x + b + noise =====
N = 200
true_w = 2.5
true_b = -0.8

In [3]:
X = torch.randn(N, 1)                           # shape: [N, 1]
noise = 0.3 * torch.randn(N, 1)
y = true_w * X + true_b + noise                 # target

In [6]:
print(y)

tensor([[ 4.4083e+00],
        [ 3.0646e+00],
        [ 1.7920e+00],
        [-6.1705e+00],
        [ 1.0046e+00],
        [-3.2866e+00],
        [-7.0877e-01],
        [-4.6002e+00],
        [-2.6740e+00],
        [ 3.0730e+00],
        [-2.1055e+00],
        [-4.5442e+00],
        [-2.4676e+00],
        [-2.1740e+00],
        [-2.5889e+00],
        [ 8.8890e-01],
        [ 3.1675e+00],
        [-1.2182e+00],
        [-2.4535e+00],
        [ 3.9792e-01],
        [-2.9901e+00],
        [ 1.9863e+00],
        [ 1.2556e+00],
        [ 3.3628e+00],
        [ 1.9252e+00],
        [ 3.1163e+00],
        [ 1.0265e+00],
        [ 2.9461e+00],
        [-1.1891e+00],
        [-5.7410e-01],
        [-1.3265e+00],
        [ 1.2832e+00],
        [-4.2098e+00],
        [-2.6627e+00],
        [-1.3562e+00],
        [ 3.4702e+00],
        [ 1.9001e-01],
        [-1.6890e+00],
        [ 1.4032e-01],
        [-2.7421e+00],
        [-4.9682e+00],
        [ 2.1343e+00],
        [-3.2727e+00],
        [-2

In [7]:
# Train/val split
idx = torch.randperm(N)
print(idx)
train_size = int(0.8 * N)
train_idx, val_idx = idx[:train_size], idx[train_size:]
X_train, y_train = X[train_idx], y[train_idx]
X_val,   y_val   = X[val_idx],   y[val_idx]

tensor([ 76, 165, 195, 156, 143, 179, 136, 129,  88, 186, 128,  41, 107,  64,
         58,  57,  86, 110, 125, 192,  25,  27, 142,  10,  61, 147,  70, 117,
        101,  47, 145, 146,  75, 113,   7,  79, 104,  45,  17, 157, 154, 162,
        196,  16,  13, 109,  66,  81,   2,  53,  60,  46, 148,  93, 140,  97,
        112, 122,  69,  14,  19,  20, 198,  84,  90, 106,  22, 167,  51, 150,
        164,  73, 158, 135,   5,   6,  91, 139,   3, 111,  68,  24, 193,  54,
        151,  77, 119,  39,  71, 174, 127,   9, 121, 181,  42,  28, 161,  34,
         56, 100, 108, 149, 166, 170, 183, 178, 103, 194,  62,  94,  38,  36,
        184,  40, 120, 172,  92, 189,  99,  49, 132,  98,  15, 180,   8, 199,
         85, 190, 155, 160, 102,  48, 171,   4,  30, 130,  72,  43, 153, 105,
        123,  55, 188,  23,  26,  29,  95, 185, 169,  12,  67, 133,  87,  32,
        137,   0,  35,  21, 168, 173,  83,  44, 175, 116, 138,  78, 159,  80,
        141, 187,  33, 144, 126, 114, 182, 191,  74,   1,  96,  

In [8]:
# Device (CPU is fine for this)
device = "cuda" if torch.cuda.is_available() else "cpu"
X_train, y_train, X_val, y_val = X_train.to(device), y_train.to(device), X_val.to(device), y_val.to(device)

In [9]:
# ===== 2) Parameters (we learn these) =====
w = torch.randn(1, requires_grad=True, device=device)  # scalar weight
b = torch.zeros(1, requires_grad=True, device=device)  # scalar bias

In [10]:

# ===== 3) Training loop (manual SGD with autograd) =====
lr = 0.1
epochs = 400

for epoch in range(1, epochs + 1):
    # Forward
    y_pred = X_train * w + b
    loss = ((y_pred - y_train) ** 2).mean()

    # Backward
    loss.backward()

    # Gradient step (no_grad because we're updating leaf tensors)
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad

    # Zero gradients for next step
    w.grad.zero_()
    b.grad.zero_()

    # Occasionally report validation loss
    if epoch % 50 == 0 or epoch == 1:
        with torch.no_grad():
            val_loss = ((X_val * w + b - y_val) ** 2).mean()
        print(f"Epoch {epoch:3d} | train_loss={loss.item():.4f} | val_loss={val_loss.item():.4f} | w={w.item():.3f} | b={b.item():.3f}")



Epoch   1 | train_loss=5.3992 | val_loss=4.6194 | w=0.618 | b=-0.113
Epoch  50 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 100 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 150 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 200 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 250 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 300 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 350 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758
Epoch 400 | train_loss=0.0780 | val_loss=0.0619 | w=2.505 | b=-0.758


In [11]:
print("\nTrue params:     w=%.3f  b=%.3f" % (true_w, true_b))
print("Learned params:  w=%.3f  b=%.3f" % (w.item(), b.item()))



True params:     w=2.500  b=-0.800
Learned params:  w=2.505  b=-0.758
