In [4]:
%pip install torch torchvision torchaudio 



In [None]:
import torch 


# set the weight 
w = torch.tensor(4.0)

# without gradient calculations 
print("1. Without gradient calculation")
print(w)
print()

# with gradient calculations 
# learnable parameter 
w = torch.tensor(4.0, requires_grad=True)
print("2. w with gradient calculations")
print(w)
print()

# with scaler functions 
# loss = w^2 + 2
loss = w**2 + 2
print(f"loss = {loss:.2f}")

# compute gradient 
loss.backward()
print(f"3. dloss/dw = {w.grad:.2f}")
print()

# without scalers 
w = torch.tensor([4.0, 5.0], requires_grad=True)
print("4. w as a vector:")
print(w)
print()

# with multidimensional case 
# f(w1, w2) =  w1**2 + w2 + 1
w = torch.tensor([3.0, 2.0], requires_grad=True)
loss = w[0]**2 + w[1] + 1
loss.backward()
print("5.")
print(f"loss = {loss:.2f}")
print(f"dloss/dw1 = {w.grad[0].item():.2f}")
print(f"dloss/dw2 = {w.grad[1].item():.2f}")
print()



1. Without gradient calculation
tensor(4.)

2. w with gradient calculations
tensor(4., requires_grad=True)

loss = 18.00
3. dloss/dw = 8.00

4. w as a vector:
tensor([4., 5.], requires_grad=True)

5.
loss = 12.00
dloss/dw1 = 6.00
dloss/dw2 = 1.00



In [15]:
# starting point 
w = torch.tensor(4.0, requires_grad=True)
lr = 0.3
steps = 15


for step in range(steps):
    loss = w**2
    loss.backward()
    print(f"Step{step:2d} | w = {w.item():6.2f} | loss = {loss.item():6.2} | gradient = {w.grad.item():6.2f}")

    # Gradient descent update
    with torch.no_grad():
        w -= lr * w.grad 
    
    # clear the grad for next iteration 
    w.grad.zero_()



Step 0 | w =   4.00 | loss = 1.6e+01 | gradient =   8.00
Step 1 | w =   1.60 | loss =    2.6 | gradient =   3.20
Step 2 | w =   0.64 | loss =   0.41 | gradient =   1.28
Step 3 | w =   0.26 | loss =  0.066 | gradient =   0.51
Step 4 | w =   0.10 | loss =   0.01 | gradient =   0.20
Step 5 | w =   0.04 | loss = 0.0017 | gradient =   0.08
Step 6 | w =   0.02 | loss = 0.00027 | gradient =   0.03
Step 7 | w =   0.01 | loss = 4.3e-05 | gradient =   0.01
Step 8 | w =   0.00 | loss = 6.9e-06 | gradient =   0.01
Step 9 | w =   0.00 | loss = 1.1e-06 | gradient =   0.00
Step10 | w =   0.00 | loss = 1.8e-07 | gradient =   0.00
Step11 | w =   0.00 | loss = 2.8e-08 | gradient =   0.00
Step12 | w =   0.00 | loss = 4.5e-09 | gradient =   0.00
Step13 | w =   0.00 | loss = 7.2e-10 | gradient =   0.00
Step14 | w =   0.00 | loss = 1.2e-10 | gradient =   0.00


In [None]:
# use tensors 
x = [1,2,3,4,5,6]
w = 7
c = 6
print(50*"-")
x_tensor = torch.tensor(x,dtype=torch.float32)
w_tensor = torch.tensor(w,dtype=torch.float32)
print(f"x_tensor = {x_tensor}")
print(f"w_tensor = {w_tensor}")
print(50*"-")

print(f"w*x_tensor = {w_tensor*x_tensor}")
# scaler w_tensor*x_tensor / multiply 7 with all numbers in x_tensor 
# look at note in week02

y = w_tensor*x_tensor + c
print(f"y = {y}")

--------------------------------------------------
x_tensor = tensor([1., 2., 3., 4., 5., 6.])
w_tensor = 7.0
--------------------------------------------------
w*x_tensor = tensor([ 7., 14., 21., 28., 35., 42.])
y = tensor([13., 20., 27., 34., 41., 48.])


In [None]:
import numpy as np

def ideal_linear_function(x):
    return 7 * x + 6

# Generate x values with fewer points
x_values = np.linspace(0, 10, 20)

# Compute the corresponding y values
y_values = ideal_linear_function(x_values)

noise = np.random.normal(0, 3, size=y_values.shape)
y_values_noisy = y_values + noise

x_tensor = torch.tensor(x_values,dtype=torch.float32)
y_tensor = torch.tensor(y_values_noisy,dtype=torch.float32)

print("-"*50)
print("x data type: ", x_tensor.dtype)
print("x shape", x_tensor.shape)
print("y data type: ", y_tensor.dtype)
print("y shape", y_tensor.shape)
print("-"*50)
print()


--------------------------------------------------
x data type:  torch.float32
x shape torch.Size([20])
y data type:  torch.float32
y shape torch.Size([20])
--------------------------------------------------



In [20]:
import torch 



w = torch.tensor(-100., requires_grad=True)
b = torch.tensor(6., requires_grad=True)

lr = 0.01
steps = 500

w_his = []
b_his = []
loss_his = []

for step in range(steps):
    y_pre = w*x_tensor + b
    loss = torch.mean((y_pre-y_tensor)**2)
    w_his.append(w.item())
    b_his.append(b.item())
    loss_his.append(loss.item())

    loss.backward()

    # Manual update
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad
        w.grad.zero_()
        b.grad.zero_()

print("Training completed.")
print(f"Final weight: {w.item():.2f}")
print(f"Final bias:   {b.item():.2f}")
print(f"Final loss:   {loss_his[-1]:.2f}")



Training completed.
Final weight: 6.79
Final bias:   7.44
Final loss:   11.51
