In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot


In [2]:
true_b = 1
true_w = 2
N = 100
np.random.seed(42)
x = np.random.rand(N, 1)
print(x.shape)
epsilon = (-0.1 * np.random.randn(N, 1))
y = true_w * x + true_b + epsilon

(100, 1)


In [3]:
idx = np.arange(N)
np.random.shuffle(idx)
train_idx = idx[:int(0.8 * N)]
val_idx = idx[int(0.8 * N):]
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx],y[val_idx]

In [4]:
# step 0: initializes the parameters randomly
np.random.seed(42)
b = np.random.randn(1)
w = np.random.randn(1)
# step 1: set the learning rate, which is eta. n like
lr = 0.03
# step 3: define the number of epochs
n_epochs = 1000
# step 4: loop
print('b, w after initial')
print(b, w)
for _ in range(n_epochs):
    # step 1: Compute the predicted output
    yhat = b + w * x_train
    # step 2: Compute the error, we are using batch gradient, that mean are using the whole data point
    error = (yhat - y_train)
    # step 3: Compute the mean loss because is linear regression
    loss = (error ** 2).mean()
    # step 4: Compute the gradient
    b_grad = 2 * error.mean()
    w_grad = 2 * (x_train * error).mean()
    # step 5: update our parameters
    b = b - lr * b_grad
    w = w - lr * w_grad
print('b, w after update')
print(b, w)


b, w after initial
[0.49671415] [-0.1382643]
b, w after update
[0.9847612] [2.01479102]


In [5]:
# sanity check
linear = LinearRegression()
linear.fit(x_train, y_train)
print(linear.intercept_, linear.coef_[0])

[0.97645925] [2.03103553]


In [6]:
scaler = torch.tensor(9.0)
vector = torch.tensor([2, 5, 6])
matrix = torch.ones(2, 3, dtype=torch.float32)
tensor = torch.randn(2, 3, 4, dtype=torch.float32)
print(scaler)
print(vector)
print(matrix)
print(tensor)

tensor(9.)
tensor([2, 5, 6])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[[-1.1046,  1.0355,  0.5287, -0.2173],
         [ 2.1459,  0.2141,  0.2186,  0.6901],
         [ 1.2733,  0.3376,  1.7435,  0.6795]],

        [[-1.3852,  1.3246,  2.0283,  0.1722],
         [-1.6862,  0.2164,  0.1491, -1.4975],
         [-0.0162,  1.3466, -1.0186, -0.5995]]])


In [7]:
print(tensor.size(), tensor.shape)

torch.Size([2, 3, 4]) torch.Size([2, 3, 4])


In [8]:
print(scaler.size(), scaler.shape)

torch.Size([]) torch.Size([])


In [9]:
same_x = matrix.view(1, 6)
same_x[0, 2] = 300
print(same_x)
print(matrix)

tensor([[  1.,   1., 300.,   1.,   1.,   1.]])
tensor([[  1.,   1., 300.],
        [  1.,   1.,   1.]])


In [10]:
diff_matrix = matrix.view(1, 6).clone().detach()
diff_matrix[0, 1] = 400
print(matrix)
print(diff_matrix)

tensor([[  1.,   1., 300.],
        [  1.,   1.,   1.]])
tensor([[  1., 400., 300.,   1.,   1.,   1.]])


In [11]:
x_train_tensor = torch.as_tensor(x_train)
x_train_tensor.dtype, x_train.dtype

(torch.float64, dtype('float64'))

In [14]:
float_tensor = x_train_tensor.float()
float_tensor.dtype

torch.float32