#  Simple and Multipal Linear Regression from Starch Using Python

## Simple Linear Regression:

In [1]:
# First let's import the necessary library

import numpy as np
import pandas as pd

In [2]:
# Lets load the dataset: 

dataset = "https://raw.githubusercontent.com/saras108/medium/master/Linear%20Regression/theft.csv"
df = pd.read_csv(dataset)
df.head()

Unnamed: 0,X,Y
0,6.2,29
1,9.5,44
2,10.5,36
3,7.7,37
4,8.6,53


In [3]:
df.isnull().sum()

X    0
Y    0
dtype: int64

In [4]:
#  Split them without using sklearn

df_suff = df.sample(frac=1)
train_size =  int(0.8* len(df))

train_set = df_suff[:train_size]
X_train = train_set['X']
y_train = train_set['Y']

test_set = df_suff[train_size:]
X_test = test_set['X']
y_test = test_set['Y']

X_train = X_train.values.reshape(X_train.shape[0],1)
X_test = X_test.values.reshape(X_test.shape[0],1)

In [5]:
# Calculating Mean Square Error
def mse(y, yhat):
    n = y.shape[0]
    sq_err = np.square(np.subtract(y , yhat))
    mse = (1/(2*n))*(np.sum(sq_err))
    return mse

In [6]:
class SimpleLinearRegression:
    
    def __init__(self , alpha = 0.001 , iteration = 1000):
            self.w = None
            self.dw = None
            self.alpha =  alpha
            self.iteration = iteration
                  
    def fit(self, X , y):
        sample, feature = X.shape
        self.w = np.zeros(feature) #Initializing weight
        for i in range( self.iteration):
            yhat = X.dot(self.w)
            self.dw = (1/sample)*(X.T.dot(yhat-y)) #derivation of mse(loss) 
            self.w -= (self.alpha*self.dw)
                
    def predict(self , X):
        y = X.dot(self.w)
        return y

In [7]:
model = SimpleLinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
loss = mse(y_test, predictions)

In [8]:
from sklearn.linear_model import LinearRegression as SKLinearRegression

sklearn_model = SKLinearRegression()
sklearn_model.fit(X_train, y_train)
sklearn_predictions = sklearn_model.predict(X_test)
sklearn_loss = mse(y_test, sklearn_predictions)

In [9]:
print(f"Sklearn: {sklearn_loss}")
print(f"Ours: {loss}")

Sklearn: 84.45581409278712
Ours: 132.5711970722647


***

## Multipal Linear Regression:

In [10]:
import pandas as pd
import numpy as np

In [11]:
from sklearn.datasets import load_boston

datas = load_boston()
df = pd.DataFrame(data = datas.data)

df.columns = datas.feature_names
df['target'] = datas.target

df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [12]:
df.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
target     0
dtype: int64

In [13]:
from sklearn.model_selection import train_test_split
X = df.drop('target' , axis = 1)
y = df['target']
X_train , X_test , y_train ,  y_test = train_test_split(X , y , test_size = 0.3)

In [14]:
from sklearn.preprocessing import MinMaxScaler
scalar = MinMaxScaler()

X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [15]:
X_train = np.hstack((np.ones((len(y_train),1)), X_train))
X_test = np.hstack((np.ones((len(y_test),1)), X_test))

In [16]:
def mse(y, yhat):
    n = y.shape[0]
    sq_err = np.square(np.subtract(y , yhat))
    mse = (1/(2*n))*(np.sum(sq_err))
    return mse

In [17]:
class LinearRegression:
    def __init__(self, alpha=0.01, n_iter=1000):
        self.W = None
        self.dW = None
        self.alpha = alpha
        self.n_iter = n_iter

    def fit(self, X, y):
        sam, feat = X.shape
        self.W =  np.zeros(feat)  
        
        for i in range(self.n_iter):
            predictions = X.dot(self.W)            
            errors =(np.subtract(predictions , y))
            self.dW = (1/sam)*(X.T.dot(errors) )
            self.W = self.W - (self.alpha*self.dW)
    
    def predict(self, X):
        prediction =  X.dot(self.W)
        return prediction

In [18]:
model = LinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
loss = mse(y_test, predictions)

In [19]:
from sklearn.linear_model import LinearRegression as MultiLinearRegression
sklearn_model = MultiLinearRegression()
sklearn_model.fit(X_train, y_train)
sklearn_predictions = sklearn_model.predict(X_test)
sklearn_loss = mse(y_test, sklearn_predictions)

In [20]:
print(f"Sklearn: {sklearn_loss}")
print(f"Ours: {loss}")

Sklearn: 14.86645397696436
Ours: 22.846433599503296
