In [1]:
import numpy as np
print(np.__version__)
import torch
print(torch.__version__)
import matplotlib.pyplot as plt

1.15.4
1.1.0


In [2]:
X = np.array([[0.11, 0.09], [0.01, 0.02], [0.98, 0.91],
              [0.12, 0.21], [0.98, 0.99], [0.85, 0.87],
              [0.03, 0.14], [0.55, 0.45], [0.49, 0.51], 
              [0.99, 0.01], [0.02, 0.89], [0.31, 0.47],
              [0.55, 0.29], [0.87, 0.76], [0.63, 0.24]])
X = np.column_stack((X, np.ones(15))) 
y = np.array([-0.8, -0.97, 0.89, -0.67, 0.97, 0.72,
              -0.83, 0.00, 0.00, 0.00, -0.09, -0.22, 
              -0.16, 0.63, 0.37])

# Let us compute solution using pseudo inverse
solution_pseudo = np.matmul(np.matmul(
    np.linalg.inv(np.matmul(X.T, X)), X.T) , y)
print("Solution via pseudo inverse: {}".format(solution_pseudo)) 


y = y.reshape((-1, 1))
X = torch.from_numpy(X)
y = torch.from_numpy(y)

# Let us define the torch module
class LinearModel(torch.nn.Module):
    def __init__(self, num_features):
        super(LinearModel, self).__init__()
        self.w = torch.nn.Parameter(
            torch.rand(num_features, 1).type('torch.DoubleTensor'))
    
    def forward(self, X):
        """
        In the forward function we accept a Tensor of input data 
        and we must return a Tensor of output data. 
        We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        y_pred  = torch.mm(X, self.w) # Computes Xw
        return y_pred

num_unknowns = 3
model =  LinearModel(num_features=num_unknowns)
# Let us use  Pytorch MSE loss function
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

# Train model iteratively
num_steps = 1000
for step in range(num_steps):
    y_pred = model(X)
    loss = loss_fn(y_pred, y)
    if step % 100 == 0:
        print("Loss at step {}: {}".format(step, loss))
    
    # Zero the gradients before running the backward pass.
    optimizer.zero_grad()
    # Compute the gradients for this step
    loss.backward()
    # Gradient descent
    optimizer.step()

solution_gd = np.squeeze(model.w.data.numpy())
print("The solution via gradient descent is {}".format(solution_gd))

assert np.allclose(solution_pseudo, solution_gd)


Solution via pseudo inverse: [ 1.07661761  0.89761672 -0.95816936]
Loss at step 0: 19.5310081288
Loss at step 100: 0.221647365442
Loss at step 200: 0.217261482822
Loss at step 300: 0.21723657621
Loss at step 400: 0.217236135456
Loss at step 500: 0.21723612699
Loss at step 600: 0.217236126827
Loss at step 700: 0.217236126824
Loss at step 800: 0.217236126824
Loss at step 900: 0.217236126824
The solution via gradient descent is [ 1.07661761  0.89761672 -0.95816936]
