# LINEAR REGRESSION 

## CHAPTER #1 - LINEAR REGRESSION MODEL 

 ### CLASS CREATION  
 This class will contain all of the model logic.   
 The class contains the Linear regression model itself. 
 Each object is a "best fit line"  i.e a line for each set of training data.   

 The attributes contain the:  
 - weight (coefficeint of the independent variable, x)  
 - bias (y-intercept) of the line 

 The methods contain all the "verbs" of the model (functions that change the weight and bias), namely:  
 - how the model learns the weight and bias. 
 - how the model fits the line to the data.  
 - how the line is evaluated e.g R^2 and RMSE. 
 - how close our predictions are i.e residuals.


In [None]:
class  LinearRegression(object):
    def __init__(self, weight=0, bias=0): #initialises the attributes of the class at 0
        self.weight = weight              #stores weight 
        self.bias = bias                  #stores bias 
        self.x = []                       #creates empty list to store our predictor variables (x)  
        self.y = []                       #creates empty list to store our our predicted variables (y) 
 
    def vectorise(self, x,y):             #defining method to store the data points to be modelled
        self.x = x                        #storing the values of x (independent variable) within the class 
        self.y = y                        #storing the values of y (dependent variable) within the class 

    def predict_y (self):                 #calculating the predicted y[i] for our optimisation later 
        y_predict =[]                     #creating an empty list to store all predicted y values 
        n = len(self.y)                   #range that we iterate over (number of values of y)

        for i in range(n):                #looping over the number of values we have in the dataset 
            y_predict.append(self.weight*self.x[i] + self.bias)     #calculating predicted y values with line equation and adding predicted values to our list 
        return y_predict                  

#NUMERICAL OPTIMISATION 
#Creating method to get weight
    def partial_w(self):                  #partial derivative in regard to weight 
        y_predict = self.predict_y()      #predicted y value is equal to calling the internal method we defined above 
        gradient = 0 
        n=len(self.y)

        for i in range(n):
            gradient += self.x[i]*(y_predict[i] - self.y[i])         #partial derivative equation to calculate total partial derivative of weight in regards to error function
        return (-2/n)*gradient                                       #returns the  weight eqaution that minimises the partial derivative in regard to error function

#Creating method to get bias 
    def partial_b(self):
        y_predict = self.predict_y()
        gradient = 0
        n=len(self.y)

        for i in range(n):
            gradient += (y_predict[i]- self.y[i])                     #partial derivative equation to calculate total partial derivative of bias in regards to error function
        return (-2/n)*gradient                                        #returns the  bias equation that minimises the partial derivative in regard to error function

#Gradient Descent - iterating over multiple steps with our partial weight and bias functions 
    def optimise(self): 
        learn_rate = 0.005                 #size of steps we make "downhill" to minimise total error in regards to the weight and bias 

        for i in range(10000):             #number of "epochs"/ steps we take in order to minimise aggregate error 
            self.weight = self.weight + learn_rate * self.partial_w() #optimised weight by calling partial_w 10000 times
            self.bias = self.bias + learn_rate * self.partial_b()     #optimised bias  by calling partial_b 10000 times
            if i % 10 == 0:                #prints out the weight and bias every 10 epochs 
                print(self.weight, self.bias)
    
#Residuals - creating a new residuals method to display deviation of predicted values from actual values
    def residuals(self):
        residuals = []
        n=len(self.x) 

        for i in range(n):
            residuals.append(self.y[i] - (self.weight * self.x[i] + self.bias)) #adding to the list called "residuals" the difference between actual and predicted y
        return residuals                                                        

#EVALUATION METRICS  -  these are key values that we will use to quantify how good our model predicts the data it is trained on. 
#Mean Square Error (MSE)  - the average squared deviation from actual values of y

    def mse(self):
        mse = 0                              #initialising our mse as a variable  that will be updated through the loops 
        mse_list = []                        #empty list to store our mse
        n=len(self.y)                        #creating length for range to iterate over
        total_error = []                     #what is the total error i.e actual - predicted y
        self.square_error = 0                #stores the square error of the deviations 

        for i in range(n):                   #iterating to calculate the mse 
            total_error.append((self.y[i] - (self.weight * self.x[i] + self.bias))**2) #deviation from actual y ^2
            self.square_error += total_error[i] #storing square error to be used in future calculations
            mse_list.append((1/n)*total_error[i]) #storing mse in the list using the mean squared error formula 
            mse += mse_list[i]               #iterates by adding all elements in the list together to give us our aggreagte mse
        return mse 
    
#R^2 -  how much of the deviation in y is explained by our model
    def rsquared(self):
        n=len(self.y)
        self.avg_y = 0                       #initial value of the average of our actual y values 

    #Average y- average of our actual y    
        for i in range(n):
            self.avg_y += ((1/n)*self.y[i])  #calculating the average value of actual y 
        
    #Total sum of squares -  
        self.sum_squares = 0                 #creating an object called sum_squares to be used further in the function 
        sum_squares_list =[]                 #empty list to store values of sum of squares 
        n = len(self.y)

        for i in range(n):
            sum_squares_list.append((self.y[i] - self.avg_y)**2) #the squared values of actual - predicted y  and storing them in the empty list above 
            self.sum_squares += sum_squares_list[i]              #adding togther all of the sum of squares into initial variable sum_squares 

    #Final calculation 
        rsquared = 0                         #initialising our value of rsquared as 0 
        n=len(self.y)

        rsquared = (1-(self.square_error/self.sum_squares)) #calculating R^2 with our instances of sum of squares and square error 
        return rsquared 



EXAMPLE WITH SIMPLE LISTS

In [None]:
x = [1,2,3,4,5]
y = [6,7,8,9,10]



In [None]:
%pip install matplotlib
from matplotlib import pyplot as plt

In [None]:
plt.plot(x,y)

In [None]:
model = LinearRegression()

In [None]:
model.vectorise(x,y)

In [None]:
model.optimise()

In [None]:
model.predict_y()

In [None]:
model.residuals()

In [None]:
model.mse()

In [None]:
model.rsquared()