## Linear Regression

In [88]:
import pandas as pd
import numpy as np

In [314]:
df=pd.read_csv('./linear_regression.csv')

In [315]:
def linear(x,theta):  
    result=x.dot(theta[1:])+theta[0]
    return result    

In [318]:
linear(df[df.columns[:3]][:3],[1,1,1,1])

0    2.0
1    4.0
2   -4.0
dtype: float64

In [319]:
def loss_func(batch,theta):
    x = batch[batch.columns[:-1]]
    y = batch[batch.columns[-1]]
    y_est = linear(x,theta)
    loss = (y_est - y).map(lambda x:x**2).mean()
    return loss

In [320]:
theta=[1,1,1,1]
loss_func(df,theta)

40.31034000000003

In [333]:
def gradient(batch,theta):
    x = batch[batch.columns[:-1]]
    y = batch[batch.columns[-1]]
    y_est = linear(x,theta)
    bsize=batch.shape[0]
    grad_o = -1/2 * sum((y_est- y))/bsize
    grad = -1/2 * x.T.dot(y_est - y)/bsize
    grad_o=pd.Series(grad_o).append(grad)
    return grad_o


In [334]:
gradient(df[:2],theta)

0    -1.0
x1   -0.5
x2    1.5
x3   -2.5
dtype: float64

In [312]:
#y_est,y,x=
gradient(df[:2],theta)

x0   -1.0
x1   -0.5
x2    1.5
x3   -2.5
dtype: float64

In [336]:
df.head()

Unnamed: 0,x1,x2,x3,y
0,0.0,-2.0,3.0,-1.0
1,2.0,0.0,1.0,3.0
2,-2.0,0.0,-3.0,-3.1
3,-1.0,2.0,1.0,-0.0
4,3.0,3.0,-4.0,-9.0


### Perform Gradient Descent

In [337]:
w = theta
alpha = 0.2
tolerance = 0.165


iterations = 1
batch = df[:400]
while True:
    gradients, error = gradient(batch,w),loss_func(batch,w)
    new_w = w + alpha * gradients
    
    # Stopping Condition
    if loss_func(batch,w) < tolerance:
        print ("Converged.")
        break
    
    # Print error every 20 iterations
    if iterations % 20 == 0:
        print ("Iteration: {} - Error: {}".format(iterations, error))
        print(w)
    
    iterations += 1
    w = new_w

Iteration: 20 - Error: 0.4407967072476012
0    -1.423233
x1   -0.955098
x2    0.036425
x3    1.048440
dtype: float64
Iteration: 40 - Error: 0.16821891768946393
0    -1.895366
x1   -0.988989
x2    0.004626
x3    1.011001
dtype: float64
Converged.


### Verify result with Scikit-learn

In [335]:
from sklearn import linear_model
cls= linear_model.LinearRegression()
x = df[df.columns[:-1]]
y = df[df.columns[-1]]
cls.fit(x,y)
print(cls.score(x,y))
print([ round(i) for i in cls.coef_],cls.intercept_)

0.9913928803342118
[-1.0, -0.0, 1.0] -1.9906408323954137
