In [1]:
# Use gradient descent to fit a line to a set of randomly generated points using PyTorch and GPU

from __future__ import print_function
import random
from datetime import datetime
import numpy as np
import torch

In [2]:
# Co-efficients of a straight line
a_org = 5
b_org = 3

# Start of the line
x_start = 0

# End of the line
x_end = 10

# How far away the points can be from the line on the y axis
pm = 10

# How many points
n = 1000

# The range of randomly generated x axis values
x_min = 0
x_max = 10

# Error limit
err_limit = 0.1

# Learning Rate
lr = 0.001

In [3]:
x = np.random.randint(x_min, x_max, n)
py = x * a_org + b_org
y = py - np.random.randint(-pm, pm, n)
# x, py, y

In [4]:
def gen_scatter(a, b, n, pm, x_min, x_max):
    x_array = torch.FloatTensor(n).random_(x_min, x_max)
    py = a * x_array + b
    y_array = py - torch.FloatTensor(n).random_(-pm, pm)
    return x_array, y_array

def err(a, b, x_array, y_array):
    y = a * x_array + b
    return ((y - y_array) ** 2).mean()

In [5]:
def avg_slope_err(x_array, y_array, a, b):
    y = a * x_array + b
    avg_a_err = (2 * (y - y_array) * x_array).mean()
    avg_b_err = (2 * (y - y_array)).mean()
    return [avg_a_err, avg_b_err] 
#a, b = avg_slope_err(x_array, y_array, a_org, b_org)
#a, b

In [6]:
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# dev = torch.device("cpu")
dev

device(type='cuda')

In [7]:
x_array, y_array = gen_scatter(a_org, b_org, n, pm, x_min, x_max)
x_array = x_array.to(dev)
y_array = y_array.to(dev)
e = err(a_org, b_org, x_array, y_array)
e

tensor(33.5940, device='cuda:0')

In [8]:
# declaring the variables for gradient decent
gd_a = torch.tensor(random.randint(-x_max, x_max), dtype = torch.float, requires_grad =  True, device=dev)
gd_b = torch.tensor(random.random(), dtype = torch.float, requires_grad =  True, device=dev)

In [9]:
i = 0
max_i = 100000

print ("The Time at the start of gd is {}. gd_a = {}. gd_b = {}".format(datetime.now(), gd_a, gd_b))

# This is the part of the program that does gradient decent
while (i < max_i):
    i = i + 1
    
    loss = err(gd_a, gd_b, x_array, y_array)
    loss.backward()
    
    with torch.no_grad():
        gd_a -= gd_a.grad * lr
        gd_b -= gd_b.grad * lr        
        gd_a.grad.zero_()
        gd_b.grad.zero_()

    # ase = avg_slope_err(x_array,y_array, gd_a, gd_b)
    # This utilizes the avg_slpoe_err function))
    # gd_a = gd_a - ase[0] * lr
    # gd_b = gd_b - ase[1] * lr
    
    if i % 10000 == 0:
        print ("The Time at iteration # {} of gd is {} gd_a {} gd_b {} err {}.".format(i, datetime.now(), gd_a, gd_b, err(gd_a, gd_b, x_array, y_array)))

print ("The Time at the end of gd is {}. gd_a = {}. gd_b = {}".format(datetime.now(), gd_a, gd_b))

print ("The original a is {} and the original b is {}. The error for these values are {}".format(a_org, b_org, err(a_org, b_org, x_array, y_array)))

The Time at the start of gd is 2019-05-25 09:11:56.412867. gd_a = 4.0. gd_b = 0.9433304071426392
The Time at iteration # 10000 of gd is 2019-05-25 09:11:59.248062 gd_a 4.943131446838379 gd_b 3.658792734146118 err 33.40387725830078.
The Time at iteration # 20000 of gd is 2019-05-25 09:12:02.091701 gd_a 4.94173002243042 gd_b 3.667710304260254 err 33.40385055541992.
The Time at iteration # 30000 of gd is 2019-05-25 09:12:05.097747 gd_a 4.94173002243042 gd_b 3.667710304260254 err 33.40385055541992.
The Time at iteration # 40000 of gd is 2019-05-25 09:12:07.879908 gd_a 4.94173002243042 gd_b 3.667710304260254 err 33.40385055541992.
The Time at iteration # 50000 of gd is 2019-05-25 09:12:10.710759 gd_a 4.94173002243042 gd_b 3.667710304260254 err 33.40385055541992.
The Time at iteration # 60000 of gd is 2019-05-25 09:12:13.537064 gd_a 4.94173002243042 gd_b 3.667710304260254 err 33.40385055541992.
The Time at iteration # 70000 of gd is 2019-05-25 09:12:16.390752 gd_a 4.94173002243042 gd_b 3.667