## Linear Regression with `numpy`
- [Why numpy over python](https://stackoverflow.com/questions/993984/why-numpy-instead-of-python-lists) ?

In [3]:
# Loading the data
import numpy as np

data_x = np.linspace(1.0, 10.0, 100)[:, np.newaxis]
data_y = np.sin(data_x) + 0.1*np.power(data_x,2) + 0.5*np.random.randn(100,1)
data_x /= np.max(data_x)

In [4]:
print (data_x.shape)
print (data_y.shape)

# Adding bias to x
data_x = np.hstack((np.ones_like(data_x), data_x))

(100, 1)
(100, 1)


In [5]:
# Shuffling data
order = np.random.permutation(len(data_x))
portion = 20

# Splitting data into train and test 
test_x = data_x[order[:portion]]
test_y = data_y[order[:portion]]
train_x = data_x[order[portion:]]
train_y = data_y[order[portion:]]

In [15]:
def get_gradient(w, x, y):
    y_estimate = x.dot(w).flatten()
    # Error = expected_value - predicted_value
    error = (y.flatten() - y_estimate)
    gradient = -(1.0/len(x)) * error.dot(x)
    return gradient, error**2

In [19]:
# Initialising a random vector of weights
w = np.random.randn(2)

# Learning rate
alpha = 0.5

# Threshold to terminate learning
tolerance = 1e-5

# Perform Gradient Descent
iterations = 1
while True:
    gradient, error = get_gradient(w, train_x, train_y)
    new_w = w - alpha * gradient
    
    # Stopping Condition
    if np.sum(abs(new_w - w)) < tolerance:
        print ("Converged.")
        break
    
    # Print error every 50 iterations
    if iterations % 100 == 0:
        print ("Iteration: "+str(iterations)+" - Error: "+ str(np.sum(error)))
    
    iterations += 1
    w = new_w

Iteration: 100 - Error: 141.95809226800142
Iteration: 200 - Error: 139.91956148098575
Iteration: 300 - Error: 139.90711964911608
Iteration: 400 - Error: 139.9070437124752
Converged.
