# LINEAR REGRESSION 

## CHAPTER #1 - LINEAR REGRESSION MODEL 

 ### CLASS CREATION  
 This class will contain all of the model logic.   
 The class contains the Linear regression model itself. 
 Each object is a "best fit line"  i.e a line for each set of training data.   

 The attributes contain the:  
 - weight (coefficeint of the independent variable, x)  
 - bias (y-intercept) of the line 

 The methods contain all the "verbs" of the model (functions that change the weight and bias), namely:  
 - how the model learns the weight and bias. 
 - how the model fits the line to the data.  
 - how the line is evaluated e.g R^2 and RMSE. 
 - how close our predictions are i.e residuals.


In [None]:
class  LinearRegressionOld(object):
    def __init__(self, learning_rate, epochs, weight=0, bias=0, ): #initialises the attributes of the class at 0
        self.weight = weight              #stores weight 
        self.bias = bias                  #stores bias 
        self.x = []                       #creates empty list to store our predictor variables (x)  
        self.y = []                       #creates empty list to store our our predicted variables (y) 
        self.learning_rate = learning_rate #setting your own learning rate 
        self.epochs = int(epochs )              #setting your own number of epochs 
 
    def vectorise(self, x,y):             #defining method to store the data points to be modelled
        self.x = x                  #storing the values of x (independent variable) within the class 
        self.y = y                  #storing the values of y (dependent variable) within the class 

    def predict_y (self):                 #calculating the predicted y[i] for our optimisation later 
        y_predict =[]                     #creating an empty list to store all predicted y values 
        n = len(self.y)                   #range that we iterate over (number of values of y)

        for i in range(n):                #looping over the number of values we have in the dataset 
            y_predict.append(self.weight*self.x[i] + self.bias)     #calculating predicted y values with line equation and adding predicted values to our list 
        return y_predict                  

#NUMERICAL OPTIMISATION 
#Creating method to get weight
    def partial_w(self):                  #partial derivative in regard to weight 
        y_predict = self.predict_y()      #predicted y value is equal to calling the internal method we defined above 
        gradient = 0 
        n=len(self.y)

        for i in range(n):
            gradient += self.x[i]*(y_predict[i] - self.y[i])         #partial derivative equation to calculate total partial derivative of weight in regards to error function
        return (-2/n)*gradient                                       #returns the  weight eqaution that minimises the partial derivative in regard to error function

#Creating method to get bias 
    def partial_b(self):
        y_predict = self.predict_y()
        gradient = 0
        n=len(self.y)

        for i in range(n):
            gradient += (y_predict[i]- self.y[i])                     #partial derivative equation to calculate total partial derivative of bias in regards to error function
        return (-2/n)*gradient                                        #returns the  bias equation that minimises the partial derivative in regard to error function

#Gradient Descent - iterating over multiple steps with our partial weight and bias functions 
    def optimise(self): 
        learn_rate = self.learning_rate                #size of steps we make "downhill" to minimise total error in regards to the weight and bias 

        for i in range(self.epochs):             #number of "epochs"/ steps we take in order to minimise aggregate error 
            self.weight = self.weight + learn_rate * self.partial_w() #optimised weight by calling partial_w 10000 times
            self.bias = self.bias + learn_rate * self.partial_b()     #optimised bias  by calling partial_b 10000 times
            if i % 10 == 0:                #prints out the weight and bias every 10 epochs 
                print(self.weight, self.bias)
    
#Residuals - creating a new residuals method to display deviation of predicted values from actual values
    def residuals(self):
        residuals = []
        n=len(self.x) 

        for i in range(n):
            residuals.append(self.y[i] - (self.weight * self.x[i] + self.bias)) #adding to the list called "residuals" the difference between actual and predicted y
        return residuals                                                        

#EVALUATION METRICS  -  these are key values that we will use to quantify how good our model predicts the data it is trained on. 
#Mean Square Error (MSE)  - the average squared deviation from actual values of y

    def mse(self):
        mse = 0                              #initialising our mse as a variable  that will be updated through the loops 
        mse_list = []                        #empty list to store our mse
        n=len(self.y)                        #creating length for range to iterate over
        total_error = []                     #what is the total error i.e actual - predicted y
        self.square_error = 0                #stores the square error of the deviations 

        for i in range(n):                   #iterating to calculate the mse 
            total_error.append((self.y[i] - (self.weight * self.x[i] + self.bias))**2) #deviation from actual y ^2
            self.square_error += total_error[i] #storing square error to be used in future calculations
            mse_list.append((1/n)*total_error[i]) #storing mse in the list using the mean squared error formula 
            mse += mse_list[i]               #iterates by adding all elements in the list together to give us our aggreagte mse
        return mse 
    
#R^2 -  how much of the deviation in y is explained by our model
    def rsquared(self):
        n=len(self.y)
        self.avg_y = 0                       #initial value of the average of our actual y values 

    #Average y- average of our actual y    
        for i in range(n):
            self.avg_y += ((1/n)*self.y[i])  #calculating the average value of actual y 
        
    #Total sum of squares -  
        self.sum_squares = 0                 #creating an object called sum_squares to be used further in the function 
        sum_squares_list =[]                 #empty list to store values of sum of squares 
        n = len(self.y)

        for i in range(n):
            sum_squares_list.append((self.y[i] - self.avg_y)**2) #the squared values of actual - predicted y  and storing them in the empty list above 
            self.sum_squares += sum_squares_list[i]              #adding togther all of the sum of squares into initial variable sum_squares 

    #Final calculation 
        rsquared = 0                         #initialising our value of rsquared as 0 
        n=len(self.y)

        rsquared = (1-(self.square_error/self.sum_squares)) #calculating R^2 with our instances of sum of squares and square error 
        return rsquared 



EXAMPLE WITH SIMPLE LISTS
Now I have added arguments for adjustable learning rates and epochs numbers 

In [None]:
x = (1,2,3,4,5)
y = (6,7,8,9,10)



In [None]:
%pip install matplotlib
%pip install pandas
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
plt.plot(x,y)

In [None]:
model = LinearRegressionOld(0.05, 10000 )

In [None]:
model.vectorise(x,y)

In [None]:
model.optimise()

In [None]:
model.predict_y()

In [None]:
model.residuals()

In [None]:
model.mse()

In [None]:
model.rsquared()

## IMPLEMENTATION WITH DIIFFERENT TYPES
Upon further inspection I realised that a big problem that would reveal itself is that when gradient descent runs, different types of  x and y values can make the model crash.  

For example, data in vectorise() taken as dataframes cannot work as the dimensions are incorrect and there are headers and indexes. Therefore I need a method to normalise all of the input of dependent and independt variables i.e turn them all into iterable, indexable and numerical compund types like lists.

Thankfully, that is what my vectorise() method is for. I just need to modify it so that:  
1) It takes in the values we want to model whether they be pandas series, lists, tuples etc
2) It checks what types our inputs are coming in as. 
   -  If they are iterable and only numeric e.g lists, tuples, numpy arrays - great 
   -  If not iterable or not only numeric - not great and needs to be wrapped into iterable like a list and made only numeric
3) Ensure data in itrable like lists is numeric and not strings 
4) Store these iterables  as self.x and self.y 

## HOW DO WE CHECK TYPES OF INPUTS?

We require iterables of the type that can:
1) Can be indexed
2) Are numeric   
- This leaves us with valid data types of **lists, tuples, numpy arrays and pandas series**

In [None]:
import numpy as np 
import pandas as pd

In [None]:
def vectoriseold(x):
    if isinstance(x, (tuple, list, np.ndarray, pd.Series, pd.DataFrame)):
        return "Ready to go!"
    else:
        return "Just a moment"
    


In [None]:
y = (10,20,30) #Trial with a tuple
vectoriseold(y)

In [None]:
x = [30,40,50] #Trial with a list
vectoriseold(x)

In [None]:
df = pd.DataFrame({'Weight':[136.3,140.5,155.2,180.0], #Trial with a dataframe 
                  'Age':[20,23,27,31]}) 
vectoriseold(df)

## CONVERTING ITERABLES INTO LISTS 
Now that we can check if our input is an interable like we want, let us convert them into common types.
I will use Python base types of a list 

In [None]:
def vectorisenewer(x):
    if isinstance(x, (tuple, list, np.ndarray, pd.Series)): #removed pd.Dataframe since our linear regression handles n x 1 features so dataframes are incorrect dimension.
        x = list(x)
        return "Ready to go!"
    else:
        print("Just a moment")
        x = [x]
        print("Ready to go!")
    return x
        
    
    
    

In [None]:
df1 = df[['Weight']]
print(df1)
vectorisenewer(df1) #works but returns entire dataframe as list with heading and index. 
                    #need to remove all metadata from the pandas dataframe 


## DEALING WITH PANDAS SERIES AND DATAFRAMES
So far our iterables are always just numeric values that can easily be converted into lists.
Pandas dataframes are different. They are lablled data structres and come with metadata like headers and indexes so we must remove them to run our model and store the numeric values.

Key use rule: Users must ensure that when selecting feautres, utilise single brackets [] to ensure data is pandas series and not dataframe 

In [None]:
def vectorise(x):
    if isinstance(x, (pd.DataFrame)):
        print("Error: Select features to create pandas series")
    elif isinstance(x, (tuple, list, np.ndarray)): 
        x = list(x)
        print("Ready to go!")
    elif isinstance(x, (pd.Series)):
        x = list(x.values)
        print("Ready to go!")
    else:
        print("Error: Please insert iteratable, numeric type like `tuple` or `pd.Series`")
    return x
        

In [None]:
df2 = df['Weight']
df3 = df[['Weight']]
print(df2)
print(df3)
print(type(df2))        #converts the dataframe into a series
print(type(df3))        #keeps dataframe as dataframe, bad since it keeps and index 


In [None]:

df2.values #returns values of the series so series is correct type
df3.values #returns multi-dimensional array that is not good for linear regression since we want two n x1 feautures. 

In [None]:

vectorise(df2) #testing on valid input 


In [None]:
vectorise(df3) #testing error message of pandas dataframe 

In [None]:
x = 64
vectorise(x) #testing on other types 

In [None]:
class  LinearRegression_newer(object):
    def __init__(self, learning_rate, epochs, weight=0, bias=0, ): #initialises the attributes of the class at with adjustable learning rate and epochs 
        self.weight = weight              #stores weight 
        self.bias = bias                  #stores bias 
        self.x = []                       #creates empty list to store our predictor variables (x)  
        self.y = []                       #creates empty list to store our our predicted variables (y) 
        self.learning_rate = learning_rate #setting your own learning rate 
        self.epochs = int(epochs)               #setting your own number of epochs 
 
    def vectorise_x(self, x):                     #method to store, convert and display error messages for input data 
        if isinstance(x, (pd.DataFrame)):       #if input data is as dataframe display error message to request series 
            print("Error: Select features to create pandas series")
        elif isinstance(x, (tuple, list, np.ndarray)): #valid input data as the type of tuples, lists, arrays that are numeric, iterable and indexable 
            self.x = list(x)                           #convert these valid types into lists 
            print("Ready to go!")
        elif isinstance(x, (pd.Series)):               #method to handle panda series 
            self.x = list(x.values)                    #extract the values from the pandas series 
            print("Ready to go!")
        else:
            print("Error: Please insert iteratable, numeric type like `tuple` or `pd.Series`") #any other data type displays error message 
        return self.x
                      
    def vectorise_y(self, y):
        if isinstance(y, (pd.DataFrame)):
            print("Error: Select features to create pandas series")
        elif isinstance(y, (tuple, list, np.ndarray)): 
            self.y = list(y)
            print("Ready to go!")
        elif isinstance(y, (pd.Series)):
            self.y = list(y.values)
            print("Ready to go!")
        else:
            print("Error: Please insert iteratable, numeric type like `tuple` or `pd.Series`")
        return self.y

    def predict_y (self):                 #calculating the predicted y[i] for our optimisation later 
        y_predict =[]                     #creating an empty list to store all predicted y values 
        n = len(self.y)                   #range that we iterate over (number of values of y)

        for i in range(n):                #looping over the number of values we have in the dataset 
            y_predict.append(self.weight*self.x[i] + self.bias)     #calculating predicted y values with line equation and adding predicted values to our list 
        return y_predict                  

#NUMERICAL OPTIMISATION 
#Creating method to get weight
    def partial_w(self):                  #partial derivative in regard to weight 
        y_predict = self.predict_y()      #predicted y value is equal to calling the internal method we defined above 
        gradient = 0 
        n=len(self.y)

        for i in range(n):
            gradient += self.x[i]*(y_predict[i] - self.y[i])         #partial derivative equation to calculate total partial derivative of weight in regards to error function
        return (-2/n)*gradient                                       #returns the  weight eqaution that minimises the partial derivative in regard to error function

#Creating method to get bias 
    def partial_b(self):
        y_predict = self.predict_y()
        gradient = 0
        n=len(self.y)

        for i in range(n):
            gradient += (y_predict[i]- self.y[i])                     #partial derivative equation to calculate total partial derivative of bias in regards to error function
        return (-2/n)*gradient                                        #returns the  bias equation that minimises the partial derivative in regard to error function

#Gradient Descent - iterating over multiple steps with our partial weight and bias functions 
    def optimise(self): 
        learn_rate = self.learning_rate                #size of steps we make "downhill" to minimise total error in regards to the weight and bias 

        for i in range(self.epochs):             #number of "epochs"/ steps we take in order to minimise aggregate error 
            self.weight = self.weight + learn_rate * self.partial_w() #optimised weight by calling partial_w 10000 times
            self.bias = self.bias + learn_rate * self.partial_b()     #optimised bias  by calling partial_b 10000 times
            if i % 10 == 0:                #prints out the weight and bias every 10 epochs 
                print(self.weight, self.bias)
    
#Residuals - creating a new residuals method to display deviation of predicted values from actual values
    def residuals(self):
        residuals = []
        n=len(self.x) 

        for i in range(n):
            residuals.append(self.y[i] - (self.weight * self.x[i] + self.bias)) #adding to the list called "residuals" the difference between actual and predicted y
        return residuals                                                        

#EVALUATION METRICS  -  these are key values that we will use to quantify how good our model predicts the data it is trained on. 
#Mean Square Error (MSE)  - the average squared deviation from actual values of y

    def mse(self):
        mse = 0                              #initialising our mse as a variable  that will be updated through the loops 
        mse_list = []                        #empty list to store our mse
        n=len(self.y)                        #creating length for range to iterate over
        total_error = []                     #what is the total error i.e actual - predicted y
        self.square_error = 0                #stores the square error of the deviations 

        for i in range(n):                   #iterating to calculate the mse 
            total_error.append((self.y[i] - (self.weight * self.x[i] + self.bias))**2) #deviation from actual y ^2
            self.square_error += total_error[i] #storing square error to be used in future calculations
            mse_list.append((1/n)*total_error[i]) #storing mse in the list using the mean squared error formula 
            mse += mse_list[i]               #iterates by adding all elements in the list together to give us our aggreagte mse
        return mse 
    
#R^2 -  how much of the deviation in y is explained by our model
    def rsquared(self):
        n=len(self.y)
        self.avg_y = 0                       #initial value of the average of our actual y values 

    #Average y- average of our actual y    
        for i in range(n):
            self.avg_y += ((1/n)*self.y[i])  #calculating the average value of actual y 
        
    #Total sum of squares -  
        self.sum_squares = 0                 #creating an object called sum_squares to be used further in the function 
        sum_squares_list =[]                 #empty list to store values of sum of squares 
        n = len(self.y)

        for i in range(n):
            sum_squares_list.append((self.y[i] - self.avg_y)**2) #the squared values of actual - predicted y  and storing them in the empty list above 
            self.sum_squares += sum_squares_list[i]              #adding togther all of the sum of squares into initial variable sum_squares 

    #Final calculation 
        rsquared = 0                         #initialising our value of rsquared as 0 
        n=len(self.y)

        rsquared = (1-(self.square_error/self.sum_squares)) #calculating R^2 with our instances of sum of squares and square error 
        return rsquared 



### PRACTICE WITH DATAFRAMES

In [None]:
df = pd.DataFrame({'Weight':[136,140,155,180], 
                  'Age':[20,23,27,31]})

In [None]:
df.head()

In [None]:
x_2=df['Weight']
print(x_2)

In [None]:
y_2=df['Age']
print(y_2)

In [None]:
plt.scatter(x_2, y_2)

In [None]:
model_2 =LinearRegression_newer(0.0000005, 10000)

In [None]:
model_2.vectorise_x(x_2)

In [None]:
model_2.vectorise_y(y_2)

In [None]:
model_2.optimise()

In [None]:
model_2.predict_y()

In [None]:
model_2.residuals()

In [None]:
model_2.mse()

In [None]:
model_2.rsquared()

### QUALITY OF LIFE IMPORVEMENTS 
I just want to make the whole class a little bit more clean and able to display output in a better way. 
This includes: 
1) Make R^2 and mse indepdnent of each other. 
2) Make output neater insetad of "np.float" just the raw values
3) Display the inner working of the model such as predicted y values 
4) Overall a little more efficient

In [None]:
class  LinearRegression(object):
    def __init__(self, learning_rate, epochs, weight=0, bias=0, ): #initialises the attributes of the class at with adjustable learning rate and epochs 
        self.weight = weight              #stores weight 
        self.bias = bias                  #stores bias 
        self.x = []                       #creates empty list to store our predictor variables (x)  
        self.y = []                       #creates empty list to store our our predicted variables (y) 
        self.learning_rate = learning_rate #setting your own learning rate 
        self.epochs = int(epochs)               #setting your own number of epochs 
 
    def vectorise_x(self, x):                   #method to store, convert and display error messages for input data 
        if isinstance(x, (pd.DataFrame)):       #if input data is as dataframe display error message to request series 
            print("Error: Select features to create pandas series")
        elif isinstance(x, (tuple, list, np.ndarray)): #valid input data as the type of tuples, lists, arrays that are numeric, iterable and indexable 
            self.x = list(x)                           #convert these valid types into lists 
            print("Ready to go!")
        elif isinstance(x, (pd.Series)):               #method to handle panda series 
            self.x = list(x.values)                    #extract the values from the pandas series 
            print("Ready to go!")
        else:
            print("Error: Please insert iteratable, numeric type like `tuple` or `pd.Series`") #any other data type displays error message 
                      
    def vectorise_y(self, y):
        if isinstance(y, (pd.DataFrame)):
            print("Error: Select features to create pandas series")
        elif isinstance(y, (tuple, list, np.ndarray)): 
            self.y = list(y)
            print("Ready to go!")
        elif isinstance(y, (pd.Series)):
            self.y = list(y.values)
            print("Ready to go!")
        else:
            print("Error: Please insert iteratable, numeric type like `tuple` or `pd.Series`")
        
                      

    def predict_y (self):                 #calculating the predicted y[i] for our optimisation later 
        self.y_predict =[]                #creating an empty list to store all predicted y values 
        n = len(self.y)                   #range that we iterate over (number of values of y)

        for i in range(n):                #looping over the number of values we have in the dataset 
            self.y_predict.append(self.weight*self.x[i] + self.bias)     #calculating predicted y values with line equation and adding predicted values to our list 
        return self.y_predict                             

#NUMERICAL OPTIMISATION 
#Creating method to get weight
    def partial_w(self):                       #partial derivative in regard to weight 
        self.y_predict = self.predict_y()      #predicted y value is equal to calling the internal method we defined above 
        gradient = 0 
        n=len(self.y)

        for i in range(n):
            gradient += self.x[i]*(self.y_predict[i] - self.y[i])    #partial derivative equation to calculate total partial derivative of weight in regards to error function
        return (-2/n)*gradient                                       #returns the  weight eqaution that minimises the partial derivative in regard to error function

#Creating method to get bias 
    def partial_b(self):
       n=len(self.y)
       gradient = 0
       self.y_predict = self.predict_y()
       
       for i in range(n):
            gradient += (self.y_predict[i]- self.y[i])               #partial derivative equation to calculate total partial derivative of bias in regards to error function
       return (-2/n)*gradient                                        #returns the  bias equation that minimises the partial derivative in regard to error function

#Gradient Descent - iterating over multiple steps with our partial weight and bias functions 
    def optimise(self): 
        learn_rate = self.learning_rate                #size of steps we make "downhill" to minimise total error in regards to the weight and bias 

        for i in range(self.epochs):                   #number of "epochs"/ steps we take in order to minimise aggregate error 
            self.weight = self.weight + learn_rate * self.partial_w() #optimised weight by calling partial_w as many times as epochs entered
            self.bias = self.bias + learn_rate * self.partial_b()     #optimised bias  by calling partial_b as many times as epochs entered
            if i % 10 == 0:                                           #prints out the weight and bias every 10 epochs 
                print(self.weight, self.bias)

#DISPLAYING MODEL INTERNALS    
#Residuals - creating a new residuals method to display deviation of predicted values from actual values
    def residuals(self):
        residuals = []
        n=len(self.x) 

        for i in range(n):
            residuals.append(self.y[i] - (self.weight * self.x[i] + self.bias)) #adding to the list called "residuals" the difference between actual and predicted y
        for i in range(n):
            print(float(residuals[i]))

#Display values of self.x, self.y and predicted y 
    def display_x(self): #displays all of the independent variables 
        n = len(self.x)

        for i in range(n):
            print(float(self.x[i]))

    def display_y(self): #displays all of the dependent variables 
        n = len(self.y)

        for i in range(n):
            print(float(self.y[i]))
    
    def display_predict(self): #displays model predictions 
        n = len(self.y)

        for i in range(n):
            print(float(self.y_predict[i]))

    def display_weight(self):
        return(self.weight)
    
    def display_bias(self):
        return(self.bias)
                                                                            
#EVALUATION METRICS  -  these are key values that we will use to quantify how good our model predicts the data it is trained on. 
#Mean Squared Error (MSE)  - the average squared deviation from actual values of y

    def mse(self):
        mse = 0                              #initialising our mse as a variable  that will be updated through the loops 
        n=len(self.y)                        #creating length for range to iterate over
        square_error = 0                #stores the square error of the deviations 

        for i in range(n):                   #iterating to calculate the mse 
            square_error += ((self.y[i] - (self.weight * self.x[i] + self.bias))**2)
        mse = square_error/n                 #iterates by adding all elements in the list together to give us our aggreagte mse
        return float(mse)
            
#R^2 -  how much of the deviation in y is explained by our model
    def rsquared(self):
        n=len(self.y)
        self.avg_y = 0                       #initial value of the average of our actual y values 

    #Average y- average of our actual y    
        for i in range(n):
            self.avg_y += ((1/n)*self.y[i])  #calculating the average value of actual y 
        
    #Total sum of squares   
        self.sum_squares = 0                 #creating an object called sum_squares to be used further in the function 
        sum_squares_list =[]                 #empty list to store values of sum of squares 
        n = len(self.y)

        for i in range(n):
            sum_squares_list.append((self.y[i] - self.avg_y)**2) #the squared values of actual - predicted y  and storing them in the empty list above 
            self.sum_squares += sum_squares_list[i]              #adding togther all of the sum of squares into initial variable sum_squares 
    #Squared error 
        self.square_error = 0                

        for i in range(n):
            self.square_error +=((self.y[i] - (self.weight * self.x[i] + self.bias))**2)                           
    #Final calculation 
        rsquared = 0                         #initialising our value of rsquared as 0 
        n=len(self.y)

        rsquared = (1-(self.square_error/self.sum_squares)) #calculating R^2 with our instances of sum of squares and square error 
        return float(rsquared) 



In [None]:
model_3 =LinearRegression(0.0000005, 10000)

In [None]:
model_3.vectorise_x(x_2)

In [None]:
model_3.vectorise_y(y_2)

In [None]:
model_3.optimise()

In [None]:
model_3.residuals()

In [None]:
model_3.display_x()

In [None]:
model_3.display_y()

In [None]:
model_3.display_predict()

In [None]:
model_3.mse()

In [None]:
model_3.rsquared()

## COMAPRISON TO SCIKIT LEARN AND TENSORFLOW 
I want to compare my model to the most popular ones present in other packages.
- This is to look inot whether or not my interpretation can hold up to these solutions
- I also wish to learn from these packages any useful techniques for handling bivrataite data modelling 


In [None]:
%pip install scikit-learn 
%pip install tensorflow 
import tensorflow as tf
import sklearn as sk
from sklearn.linear_model import LinearRegression as lr

In [None]:
# set random seed for reproducibility
np.random.seed(42)

In [None]:
# generate 100 random ages between 18 and 60
age = np.random.randint(18, 60, size=100)

# generate height based loosely on age — roughly linear but with randomness
# each person gets a slightly different "slope" multiplier to make it non-perfectly linear
random_slope = np.random.normal(2.5, 0.4, size=100)  # average slope ~2.5 with variation
noise = np.random.normal(0, 5, size=100)             # constant variance noise (homoscedastic)

height = age * random_slope + noise

# create the dataframe
df = pd.DataFrame({
    'age': age,
    'height': height
})

# preview
print(df.head())

In [None]:
x = df['age']
y = df['height']

In [None]:
plt.scatter(x,y)

### MY MODEL

In [None]:
my_model = LinearRegression( 0.00015, 100000)

In [None]:
my_model.vectorise_x(x)

In [None]:
my_model.vectorise_y(y)

In [None]:
my_model.optimise()

In [None]:
mine_rsquared = my_model.rsquared() 


In [None]:
mine_mse = my_model.mse()



In [None]:
mine_weight = my_model.display_weight()

In [None]:
mine_bias = my_model.display_bias()

### SCIKIT-LEARN MODEL 

In [None]:
#import to spilt up the data 
from sklearn.model_selection import train_test_split 

In [None]:
#selecting the fueatures we will use on predictions 
x_sk = df[['age']] 
y_sk = df[['height']]

In [None]:
#using all of the data as training data 
x_train, y_train = x_sk, y_sk

In [None]:
#model declaration 
sklearn_model = lr()

In [None]:
#model creation 
sklearn_model.fit(x_train, y_train)

In [None]:
#R^2 on training data 
rsquared_train = sklearn_model.score(x_train, y_train)

In [None]:
sk_coef = sklearn_model.coef_
print(sk_coef)

In [None]:
sk_intercept = sklearn_model.intercept_
print(sk_intercept)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mse_train = mean_squared_error(x_train, y_train)
print(mse_train)

In [None]:
comparison = pd.DataFrame({
        "Model name" : ["My model", "Scikit-learn 100% train"],
        "Mean squared error" : [mine_mse, mse_train],
        "R-squared" : [mine_rsquared, rsquared_train],
        "Weight (coefficient)" : [mine_weight, float(sk_coef)],
        "Bias (intercept)" : [mine_bias, float(sk_intercept)]

})

In [None]:
comparison.head()