# Gradient Descent for Linear Regression

In [1]:
%matplotlib notebook

from matplotlib.pyplot import legend, plot, scatter, show
from numpy import array, mean, sqrt, square

## reading dataset

In [2]:
# data_set
data_set = array([
    [32,31], [53,68], [60,62], [47,71], [59,87],
    [55,78], [52,79], [39,59], [48,75], [52,71],
    [45,55], [54,82], [44,62], [58,75], [56,81],
    [48,60], [44,82], [60,97], [45,48], [38,56],
    [66,83], [65,11], [47,57], [41,51], [51,75]
])

# taking independent & dependent variables
x_list = data_set[:, 0]
y_list = data_set[:, 1]

print('x_list :', x_list[:5])
print('y_list :', y_list[:5])

x_list : [32 53 60 47 59]
y_list : [31 68 62 71 87]


## initialising parameters

In [3]:
# initialising line coefficients
intercept = 0
slope = 0

# rate of change per iteration & no. of iterations
learning_rate = 0.0001
iteration_cycles = 1000

count = len(x_list)

In [4]:
# predicting function (line equation)

line_equation = lambda x: slope * x + intercept

## training (finding out math variable values for gived dataset)

In [5]:
for _ in range(1000):
    # computing new line based on previous coefficients
    new_y = list(map(line_equation, x_list))

    # taking deviation of each point from the line
    y_difference = y_list - new_y

    # change to be made based on deviation
    intercept_gradient = sum(2 / count * y_difference)
    slope_gradient = sum(2 / count * y_difference * x_list)

    # changing coefficients based on learning rate
    intercept += learning_rate * intercept_gradient
    slope += learning_rate * slope_gradient

## predicting line of best fit & its deviation w.r.t points

In [6]:
predicted_y = list(map(line_equation, x_list))

rmse = sqrt(mean(square(y_list - predicted_y)))

print("intercept =", intercept)
print("slope =", slope)
print("rmse =", rmse)

intercept = 0.17933157753031664
slope = 1.2964981522117367
rmse = 17.73492644296715


## plotting dataset, fitted line & predicted value

In [7]:
test_x = [44, 32, 63]
test_y = list(map(line_equation, test_x))

scatter(x_list, y_list, label='original')
scatter(test_x, test_y, label='prediction')
plot(x_list, predicted_y, label='line of best fit')
legend()
show()

<IPython.core.display.Javascript object>