# Linear Regression

In [1]:
import numpy as np
import time

class LinearRegression():
    def __init__(self, alpha=0.01, n_iterations=1000):
        self.alpha = alpha
        self.n_iterations = n_iterations
        self.weights = None  #Theta / slope
        self.bais = None     #Intercept
        
    def fit(self, X, y):
        
        start = time.time()
        
        n_samples, n_features = X.shape       # X        --> rows = n_samples , columns = n_features 
        self.weights = np.zeros(n_features)   # weights  --> rows = n_features, columns = 1
        self.bais = 0
        
        for i in range(self.n_iterations):
            # prediction
            y_hat = np.dot(X, self.weights) + self.bais  # y_hat --> rows = n_samples, columns = 1

            # Gradient Descent for Optimization
                            
                                             # y_hat - y --> rows = n_samples, columns = 1
                                             # X         --> rows = n_samples , columns = n_features
                     # (y_hat - y).T , X ==>  1, n_features
                     # X.T , (y_hat - y) ==>  n_features, 1 <--- we want this as our answer
            
            #derivative part
            dw = (1/n_samples) * np.dot(X.T , (y_hat - y))
            db = (1/n_samples) * np.sum(y_hat - y)
            
            #updating weights and bias
            
            self.weights -= self.alpha * dw
            self.bais -= self.alpha * db
        
        end = time.time()
        
        print('TIME TAKEN By fit function in LinearRegression = ', end - start,'sec' )
            
    def predict(self, X):
        return np.dot(X, self.weights) + self.bais 
        

In [2]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

X, y = datasets.make_regression(n_samples=10000, n_features=800, noise=25, random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [3]:
reg = LinearRegression()
reg.fit(X_train, y_train)
predicted = reg.predict(X_test)

TIME TAKEN By fit function in LinearRegression =  8.943090915679932 sec


=============================================================================================

# Normal Equation

In [3]:
import numpy as np
import time

class LinearReg():
    def __init__(self):
        self.theta = None
        
    def fit(self, X, y):
        
        start = time.time()
        
        n_samples, n_features = X.shape
        x0 = np.ones((n_samples, 1), dtype='int')
        
        X = np.concatenate((x0, X), axis=1)
      
        
        self.theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
        
        
        end = time.time()
        
        print('TIME TAKEN By fit function in LinearRegression(Normal Equation) = ', end - start,'sec' )
            
    def predict(self, X_new):
        n_sam, n_feat = X_new.shape
        x = np.ones((n_sam, 1), dtype='int')
        
        X_new = np.concatenate((x, X_new), axis=1)
        y_pred = X_new.dot(self.theta)
        return y_pred

In [1]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

X, y = datasets.make_regression(n_samples=100000, n_features=1500, noise=25, random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [4]:
reg = LinearReg()
reg.fit(X_train, y_train)

predicted = reg.predict(X_test)

TIME TAKEN By fit function in LinearRegression(Normal Equation) =  6.9813337326049805 sec


==========================================================================================================

## What Are the Basic Assumption?(favourite)

<u>There are four assumptions associated with a linear regression model:</u><br>

1. Linearity: The relationship between X and the mean of Y is linear.<br>
2. Homoscedasticity: The variance of residual is the same for any value of X.<br>
3. Independence: Observations are independent of each other.<br>
4. Normality: For any fixed value of X, Y is normally distributed.<br>

<u>Advantages</u><br>
1. Linear regression performs exceptionally well for linearly separable data<br>
2. Easy to implement and train the model<br>
3. It can handle overfitting using dimensionlity reduction techniques and cross validation and regularization

<u>Disadvantages</u><br>
1. Sometimes Lot of Feature Engineering Is required<br>
2. If the independent features are correlated it may affect performance<br>
3. It is often quite prone to noise and overfitting<br>
4. Learning Rate need to be set<br>
5. No. of Iterations need to be set

<u>Whether Feature Scaling is required?</u><br>
Yes<br>
No(For Normal Equation)

<u>Impact of Missing Values?</u><br>
It is sensitive to missing values

<u>Impact of outliers?</u><br>
linear regression needs the relationship between the independent and dependent variables to be linear. It is also important to check for outliers since linear regression is sensitive to outlier effects.

Regularization
1. Ridge
2. Lasso
3. Elastic Net

In [23]:
class Regularization():
    def __init__(self, alpha=0.01, l1=100, l2=100, n_iterations=1000, ridge=False, lasso=False, elastic_net=False):
        self.alpha = alpha
        self.n_iterations = n_iterations
        self.weights = None  #Theta / slope
        self.bais = None     #Intercept
        self.l1 = l1
        self.l2 = l2
        self.ridge = ridge
        self.lasso = lasso
        self.elastic_net = elastic_net
        
    def fit(self, X, y):
        
        start = time.time()
        
        n_samples, n_features = X.shape       
        self.weights = np.zeros(n_features)   
        self.bais = 0
        
        for i in range(self.n_iterations):
            # prediction
            y_hat = np.dot(X, self.weights) + self.bais  
            
            #derivative part
            dw = (1/n_samples) * np.dot(X.T , (y_hat - y))
            
            if self.ridge or self.elastic_net:
                dw += (self.l1 * self.weights)
                
            if self.lasso or self.elastic_net:
                for w in range(len(self.weights)):
                    if self.weights[w] >= 0:
                        dw += self.l2
                    else:
                        dw -= self.l2
        
            db = (1/n_samples) * np.sum(y_hat - y)
            
            #updating weights and bias
            
            self.weights -= self.alpha * dw
            self.bais -= self.alpha * db
        
        end = time.time()
        
        print('TIME TAKEN By fit function in regularization = ', end - start,'sec' )
            
    def predict(self, X):
        return np.dot(X, self.weights) + self.bais 

In [13]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

X, y = datasets.make_regression(n_samples=1000, n_features=100, noise=25, random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [17]:
reg = Regularization(ridge=True)
reg.fit(X_train, y_train)

predicted = reg.predict(X_test)

TIME TAKEN By fit function in regularization =  0.29421091079711914 sec


In [24]:
reg = Regularization(lasso=True)
reg.fit(X_train, y_train)

predicted = reg.predict(X_test)

TIME TAKEN By fit function in regularization =  1.1359577178955078 sec


In [25]:
reg = Regularization(elastic_net=True)
reg.fit(X_train, y_train)

predicted = reg.predict(X_test)

TIME TAKEN By fit function in regularization =  0.9319190979003906 sec


<img src='regularization.png'>
<img src='derivative of absolute func.png'>
<img src='derivative of ridge reg.png'>