## Load Data

In [39]:
import csv
import pandas as pd
import numpy as np
#To use The univariate data just uncomment second line and comment the following one
# data=np.loadtxt('multivariateData.dat',delimiter=',')
data=np.loadtxt('univariateData.dat',delimiter=',')

## Matrix Multiplication (as in a0+a1x)

In [40]:
def matrixMulti(theta, X, n):
    h = np.ones((X.shape[0],1))
    theta = theta.reshape(1,n+1)
    for i in range(0,X.shape[0]):
        h[i] = float(np.matmul(theta, X[i]))
    h = h.reshape(X.shape[0])
    return h

## Computing Cost


In [41]:
def computeCost(X,y,h):
    cost = (1/X.shape[0]) * 0.5 * sum(np.square(h - y))
    return cost

## Computing Gradient Descent

In [42]:
def gradientDescent(theta, alpha, num_iters, h, X, y, n):
    cost = np.ones(num_iters)
    for i in range(0,num_iters):
        theta[0] = theta[0] - (alpha/X.shape[0]) * sum(h - y)
        for j in range(1,n+1):
            theta[j] = theta[j] - (alpha/X.shape[0]) * sum((h-y) * X.transpose()[j])
        h = matrixMulti(theta, X, n)
        cost[i] = computeCost(X,y,h)
    theta = theta.reshape(1,n+1)
    return theta, cost

## Fit Function 


In [43]:
def fit(X, y, alpha, num_iters):
    n = X.shape[1]
    one_column = np.ones((X.shape[0],1))
    X = np.concatenate((one_column, X), axis = 1)
    # initializing the parameter vector...
    theta = np.zeros(n+1)
    # matrixMulti calculation....
    h = matrixMulti(theta, X, n)
    # returning the optimized parameters by Gradient Descent...
    theta, cost = gradientDescent(theta,alpha,num_iters,h,X,y,n)
    return theta

## Split Data to train and Test

In [44]:
from sklearn.model_selection import train_test_split
X=data[:,:-1]
y=data[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


## Scaing data function


In [45]:
def scaling(dataToScale):
    mean = np.ones(dataToScale.shape[1])
    std = np.ones(dataToScale.shape[1])
    for i in range(0, dataToScale.shape[1]):
        mean[i] = np.mean(dataToScale.transpose()[i])
        std[i] = np.std(dataToScale.transpose()[i])
        for j in range(0, dataToScale.shape[0]):
            dataToScale[j][i] = (dataToScale[j][i] - mean[i])/std[i]

    return dataToScale
X_train=scaling(X_train)

## Calling the linear regression function with learning_rate = 0.0001 and num_iters = 100000

In [46]:
theta = fit(X_train, y_train,0.0001, 100000)
theta

array([[5.94309165, 4.80297162]])

## Preparing Vectors for multiplication

In [47]:
def vectorization(data):
    data=scaling(data)
    data = np.concatenate((np.ones((data.shape[0],1)), data),axis = 1)
    return data


## Prediction Function

In [48]:
 
def predict(predictiondata):
    predictiondata = vectorization(predictiondata)  
    predictions = matrixMulti(theta,predictiondata, predictiondata.shape[1] - 1)
    return predictions
y_predict=predict(X_test)
print(y_test)
y_predict

[17.929    0.55657  1.9869   3.6518   0.20421  7.5435  17.054    5.3854
  3.2522  17.592    5.9966   0.29678  1.844   13.501    6.7318   6.8233
  4.263    3.1551   2.8214   3.3411   1.0179   6.6799   1.0173   3.5129
  5.4974   1.8495   3.8166   0.47953 15.505    4.8852 ]


array([21.94410875,  3.01826045,  2.89723213,  4.23772511,  2.75772165,
        8.21925875, 19.17774715, 10.53167473,  3.38051073,  3.85675419,
        4.42827019,  4.3787857 ,  3.48866116, 11.86715964,  5.90123851,
        3.55829716,  3.1174679 ,  3.40829359,  2.60390241,  5.18723104,
        3.17887636,  8.65722626,  3.12855719,  2.98546952,  4.14686444,
        3.59645387,  2.5981789 ,  3.40257008, 13.46020236,  5.17804958])

## Calculating Accuracy 

In [49]:
def EvaluatePerformance(y_,y):
#Evaluating preformance by using R^2 method
    sst = np.sum((y-y.mean())**2)
    ssr = np.sum((y_-y)**2)
    r2 = 1-(ssr/sst)
    return(r2)

EvaluatePerformance(y_predict,y_test)

0.608541750529632

## Comparing between this method and the original model from sklearn:

In [50]:
import numpy as np
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
reg=clf.fit(X_train, y_train)

pred=reg.predict(X_test)
print('Implementation of Code',y_predict)
print('Library Model Results',pred)
EvaluatePerformance(pred,y_test)

Implementation of Code [21.94410875  3.01826045  2.89723213  4.23772511  2.75772165  8.21925875
 19.17774715 10.53167473  3.38051073  3.85675419  4.42827019  4.3787857
  3.48866116 11.86715964  5.90123851  3.55829716  3.1174679   3.40829359
  2.60390241  5.18723104  3.17887636  8.65722626  3.12855719  2.98546952
  4.14686444  3.59645387  2.5981789   3.40257008 13.46020236  5.17804958]
Library Model Results [21.94510456  3.01839742  2.8973636   4.23791742  2.75784679  8.21963173
 19.17861742 10.53215265  3.38066414  3.85692921  4.42847114  4.37898441
  3.48881947 11.86769816  5.9015063   3.55845863  3.11760937  3.40844825
  2.60402057  5.18746643  3.17902061  8.65761912  3.12869917  2.985605
  4.14705262  3.59661708  2.5982968   3.40272449 13.46081317  5.17828455]


0.6085249061106279