In [109]:
!pip install torch



In [110]:
import torch
from torch import nn

In [111]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [112]:
class OneNeuron(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(1))
        self.bias = nn.Parameter(torch.randn(1))
    
    def forward(self, x):
        return self.weight * x + self.bias


In [113]:
# keras one neuron code
# model = tf.keras.Sequential([
#     tf.keras.layers.Dense(units=1, input_shape=[1])
# ])


In [114]:
# class NeuralNetwork(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.flatten = nn.Flatten()
#         self.linear_relu_stack = nn.Sequential(
#             nn.Linear(28*28, 512),
#             nn.ReLU(),
#             nn.Linear(512, 512),
#             nn.ReLU(),
#             nn.Linear(512, 10),
#         )

#     def forward(self, x):
#         x = self.flatten(x)
#         logits = self.linear_relu_stack(x)
#         return logits


In [115]:
# keras code for class NeuralNetwork
# model = tf.keras.Sequential([ 
#     tf.keras.layers.Flatten(input_shape=(28, 28)),
#     tf.keras.layers.Dense(512, activation='relu'),
#     tf.keras.layers.Dense(512, activation='relu'),
#     tf.keras.layers.Dense(10),
# ])


In [116]:
model = OneNeuron().to(device)
print(model)

OneNeuron()


In [117]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: OneNeuron()


Layer: weight | Size: torch.Size([1]) | Values : tensor([0.2623], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: bias | Size: torch.Size([1]) | Values : tensor([-0.9676], device='mps:0', grad_fn=<SliceBackward0>) 



In [118]:
# torch tensor
X = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
Y = torch.tensor([3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0])

X = X.to(device)
Y = Y.to(device)


In [119]:
# base prediction of one neuron
y_pred = model(X)
print(f"Prediction: {y_pred}")
print(f"Actual: {Y}")

Prediction: tensor([-0.7053, -0.4430, -0.1807,  0.0816,  0.3439,  0.6062,  0.8685,  1.1308,
         1.3931,  1.6554], device='mps:0', grad_fn=<AddBackward0>)
Actual: tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')


In [120]:
# Define hyperparameters
learning_rate = 1e-2
epochs = 1

# Loss Function
loss_fn = nn.MSELoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [121]:
# in kers it is very simple
# import tensorflow as tf
# model = tf.keras.Sequential([
#     tf.keras.layers.Dense(units=1, input_shape=[1])
# ])

# model.compile(optimizer='sgd', loss='mse', learning_rate=learning_rate, metrics=['accuracy'])
# model.fit(X, Y, epochs=5)
    

In [122]:
# train loop for one neuron
def trainloop (model, loss_fn, optimizer):
    size = len(X)
    for epoch in range(epochs):
        for i in range(size):
            x,y = X[i], Y[i]
            print(f"X: {x}, y: {y}")
            x,y=x.to(device),y.to(device)
            prediction = model(x)
            print(f"prediction: {prediction}")
            loss = loss_fn(prediction, y)
            print(f"loss: {loss}")
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"loss: {loss:>7f} [{i:>5d}/{size:>5d}]")


In [123]:
# evaluate loop for one neuron regression  
def evaluate (model, loss_fn):
    size = len(X)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i in range(size):
            x,y = X[i], Y[i]
            x,y=x.to(device),y.to(device)
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    test_loss /= size
    print(f'Test Error: \n Avg loss: {test_loss:>8f} \n')


In [127]:
for i in range(100):
    print(f"Epoch {i+1}\n-------------------------------")
    trainloop(model, loss_fn, optimizer)
    evaluate(model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
X: 1.0, y: 3.0
prediction: tensor([2.7752], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.050539590418338776
loss: 0.050540 [    0/   10]
X: 2.0, y: 5.0
prediction: tensor([4.8121], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.035317547619342804
loss: 0.035318 [    1/   10]
X: 3.0, y: 7.0
prediction: tensor([6.8663], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.017884206026792526
loss: 0.017884 [    2/   10]
X: 4.0, y: 9.0
prediction: tensor([8.9364], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.004039329010993242
loss: 0.004039 [    3/   10]
X: 5.0, y: 11.0
prediction: tensor([11.0066], device='mps:0', grad_fn=<AddBackward0>)
loss: 4.312550663598813e-05
loss: 0.000043 [    4/   10]
X: 6.0, y: 13.0
prediction: tensor([13.0510], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.002601936226710677
loss: 0.002602 [    5/   10]
X: 7.0, y: 15.0
prediction: tensor([15.0550], device='mps:0', grad_fn=<AddBackward0>)
loss: 0.0030248237308114767
l

In [128]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: OneNeuron()


Layer: weight | Size: torch.Size([1]) | Values : tensor([2.0007], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: bias | Size: torch.Size([1]) | Values : tensor([0.9928], device='mps:0', grad_fn=<SliceBackward0>) 



In [129]:
# base prediction of one neuron
y_pred = model(X)
print(f"Prediction: {y_pred}")
print(f"Actual: {Y}")

Prediction: tensor([ 2.9935,  4.9942,  6.9948,  8.9955, 10.9962, 12.9969, 14.9975, 16.9982,
        18.9989, 20.9996], device='mps:0', grad_fn=<AddBackward0>)
Actual: tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')


In [138]:
model2 = OneNeuron().to(device)
print(X)
print(model2(X))
print(Y)
print(dict(model2.named_parameters()))
# update weight and bias directly
model2.weight = nn.Parameter(torch.tensor([2.0]))
model2.bias = nn.Parameter(torch.tensor([1.0]))
print(dict(model2.named_parameters()))
model2.to(device)
x,y = X[0], Y[0]
x,y=x.to(device),y.to(device)
prediction = model2(x)
loss = loss_fn(prediction, y)
print(f"loss: {loss}")
print(model2(X))




tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([-0.1131,  0.5828,  1.2788,  1.9747,  2.6706,  3.3665,  4.0625,  4.7584,
         5.4543,  6.1502], device='mps:0', grad_fn=<AddBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'weight': Parameter containing:
tensor([0.6959], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([-0.8090], device='mps:0', requires_grad=True)}
{'weight': Parameter containing:
tensor([2.], requires_grad=True), 'bias': Parameter containing:
tensor([1.], requires_grad=True)}
loss: 0.0
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0',
       grad_fn=<AddBackward0>)


In [139]:
model2 = OneNeuron().to(device)
print(X)
print(model2(X))
print(Y)
print(dict(model2.named_parameters()))
x,y = X[0], Y[0]
x,y=x.to(device),y.to(device)
prediction = model2(x)
loss = loss_fn(prediction, y)
optimizer.zero_grad() # make gradient zero
loss.backward() # backpropagate
optimizer.step() # update parameters
print(dict(model2.named_parameters()))



tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([0.3859, 0.8723, 1.3587, 1.8451, 2.3315, 2.8179, 3.3043, 3.7907, 4.2771,
        4.7635], device='mps:0', grad_fn=<AddBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'weight': Parameter containing:
tensor([0.4864], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([-0.1005], device='mps:0', requires_grad=True)}
{'weight': Parameter containing:
tensor([0.4864], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([-0.1005], device='mps:0', requires_grad=True)}


In [144]:


model3 = OneNeuron().to(device)
print(X)
print(model3(X))
print(Y)
print(dict(model3.named_parameters()))

loss_fn3 = nn.MSELoss()
optimizer3 = torch.optim.SGD(model3.parameters(), lr=learning_rate)

for i in range(len(X)):
    x,y = X[i], Y[i]
    x,y=x.to(device),y.to(device)
    prediction = model3(x)
    loss = loss_fn(prediction, y)
    optimizer.zero_grad() # make gradient zero
    loss.backward() # backpropagate
    # get gradient values
    print(f"Gradient: {model3.weight.grad}")
    print(f"Gradient: {model3.bias.grad}")  
    optimizer.step() # update parameters
print(dict(model3.named_parameters()))



tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([ -0.8848,  -2.2376,  -3.5905,  -4.9433,  -6.2962,  -7.6490,  -9.0019,
        -10.3547, -11.7076, -13.0605], device='mps:0', grad_fn=<AddBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'weight': Parameter containing:
tensor([-1.3529], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([0.4681], device='mps:0', requires_grad=True)}
{'weight': Parameter containing:
tensor([-1.3529], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([0.4681], device='mps:0', requires_grad=True)}


  return F.mse_loss(input, target, reduction=self.reduction)


In [149]:

model3 = OneNeuron().to(device)
print(X)
print(model3(X))
print(Y)
print(dict(model3.named_parameters()))

loss_fn3 = nn.MSELoss()
optimizer3 = torch.optim.SGD(model3.parameters(), lr=learning_rate)

def eval2(model, loss_fn, X, Y):
    size = len(X)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i in range(size):
            x,y = X[i], Y[i]
            x,y=x.to(device),y.to(device)
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    test_loss /= size
    print(f'Test Error: \n Avg loss: {test_loss:>8f} \n')

for epoch in range(1000):
    for i in range(len(X)):
        x,y = X[i], Y[i]
        x,y=x.to(device),y.to(device)
        prediction = model3(x)
        loss = loss_fn(prediction, y)
        optimizer3.zero_grad() # make gradient zero
        loss.backward() # backpropagate  
        optimizer3.step() # update parameters
    # print(dict(model3.named_parameters()))
    eval2(model3, loss_fn3, X, Y)



tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([ -2.5652,  -3.6210,  -4.6769,  -5.7328,  -6.7887,  -7.8446,  -8.9005,
         -9.9564, -11.0123, -12.0682], device='mps:0', grad_fn=<AddBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'weight': Parameter containing:
tensor([-1.0559], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([-1.5093], device='mps:0', requires_grad=True)}
Test Error: 
 Avg loss: 0.759571 

Test Error: 
 Avg loss: 0.707666 

Test Error: 
 Avg loss: 0.659308 

Test Error: 
 Avg loss: 0.614253 

Test Error: 
 Avg loss: 0.572278 

Test Error: 
 Avg loss: 0.533173 

Test Error: 
 Avg loss: 0.496739 

Test Error: 
 Avg loss: 0.462795 

Test Error: 
 Avg loss: 0.431169 

Test Error: 
 Avg loss: 0.401705 

Test Error: 
 Avg loss: 0.374258 

Test Error: 
 Avg loss: 0.348680 

Test Error: 
 Avg loss: 0.324855 

Test Error: 
 Avg loss: 0.302654 

Test Error: 
 Avg loss: 0.2819

In [150]:
print(dict(model3.named_parameters()))

{'weight': Parameter containing:
tensor([2.0000], device='mps:0', requires_grad=True), 'bias': Parameter containing:
tensor([1.0000], device='mps:0', requires_grad=True)}


## Using Sequential

In [162]:
class OneNeuronSequential(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)
    
    def forward(self, x):
        return self.linear(x)

model4 = OneNeuronSequential().to(device)
print(X)
print(model4(X.reshape(-1, 1)))
print(Y)
print(dict(model4.named_parameters()))

loss_fn4 = nn.MSELoss()
optimizer4 = torch.optim.SGD(model4.parameters(), lr=learning_rate)

def eval2(model, loss_fn, X, Y):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        pred = model(X.reshape(-1, 1))
        test_loss += loss_fn(pred, Y).item()
    print(f'Test Error: \n Avg loss: {test_loss:>8f} \n')

for epoch in range(1000):
    prediction = model4(X.reshape(-1, 1))
    loss = loss_fn4(prediction, Y.reshape(-1, 1))
    optimizer4.zero_grad() # make gradient zero
    loss.backward() # backpropagate  
    optimizer4.step() # update parameters
    # print(dict(model4.named_parameters()))
    eval2(model4, loss_fn4, X, Y.reshape(-1, 1))


tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([[-0.3963],
        [-1.3061],
        [-2.2159],
        [-3.1257],
        [-4.0355],
        [-4.9453],
        [-5.8552],
        [-6.7650],
        [-7.6748],
        [-8.5846]], device='mps:0', grad_fn=<LinearBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'linear.weight': Parameter containing:
tensor([[-0.9098]], device='mps:0', requires_grad=True), 'linear.bias': Parameter containing:
tensor([0.5135], device='mps:0', requires_grad=True)}
Test Error: 
 Avg loss: 15.682602 

Test Error: 
 Avg loss: 0.720445 

Test Error: 
 Avg loss: 0.033953 

Test Error: 
 Avg loss: 0.002448 

Test Error: 
 Avg loss: 0.000995 

Test Error: 
 Avg loss: 0.000921 

Test Error: 
 Avg loss: 0.000910 

Test Error: 
 Avg loss: 0.000902 

Test Error: 
 Avg loss: 0.000895 

Test Error: 
 Avg loss: 0.000887 

Test Error: 
 Avg loss: 0.000880 

Test Error: 
 Avg loss: 0.000873 

Test Er

In [163]:
print(X)
print(model4(X.reshape(-1, 1)))
print(Y)
print(dict(model4.named_parameters()))

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], device='mps:0')
tensor([[ 2.9991],
        [ 4.9993],
        [ 6.9994],
        [ 8.9996],
        [10.9997],
        [12.9999],
        [15.0000],
        [17.0001],
        [19.0003],
        [21.0004]], device='mps:0', grad_fn=<LinearBackward0>)
tensor([ 3.,  5.,  7.,  9., 11., 13., 15., 17., 19., 21.], device='mps:0')
{'linear.weight': Parameter containing:
tensor([[2.0001]], device='mps:0', requires_grad=True), 'linear.bias': Parameter containing:
tensor([0.9990], device='mps:0', requires_grad=True)}
