<a href="https://colab.research.google.com/github/thefr33radical/projects/blob/master/research/Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression

## References


*   https://ml-cheatsheet.readthedocs.io/
*   https://medium.com
*  https://scikit-learn.org/
*  Introduction to Statistical Learning


In [113]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from google.colab import files
import io

class Linear_Regression(object):
  
  def __init__(self,name_file):
    """
    Constructor to load data on collab
    """
    uploaded = files.upload()
    self.data = io.BytesIO(uploaded[name_file])
    
  
  def load_data(self,data):
    """
    Function to load  data
    """
    data = pd.read_csv(data,index_col=False)
    
    data = data.drop(['Unnamed: 0'],axis=1)
    model = StandardScaler()
    data.iloc[:,:-1] = model.fit_transform(data.iloc[:,:-1])
    #print(data)
    return data    
    
  def predict(self, weights,x_array,bias):
    """    
    Function to predict using weights and bias
    returns float
    """
    return (np.dot(weights,x_array)+bias)
    
  
  def cost_function(self,data,weights,bias):
    """
    Function implements MSE
    """
    predict_y =[]
    for rows in range(len(data)):
      x_array = data.iloc[rows,:-1]
      predict_y.append(self.predict(weights,x_array,bias))
    
    target_y = data.iloc[:,-1]
    target_y = np.array(target_y)
    predict_y = np.array(predict_y)
    predict_y =  predict_y.T
      
    mse = (target_y-predict_y)**2  
    mse = sum(mse)/(2*len(data))
    return mse   
    
    
  def stochastic_gradient_descent(self,data,weights,bias):
    """
    Function implements stochastic gradient decent algorithm
    """
    
    target_y = data.iloc[:,-1]
    predict_y=[]
    lr=0.01
    
    for rows in range(len(data)):
      x_array = data.iloc[rows,:-1]
      predict_y.append(self.predict(weights,x_array,bias))
    
    z = target_y - np.array(predict_y).T
        
    new_weights= [0]*len(weights)
    for w in range(len(weights)):     
        x_var = data.iloc[:,w]
        
        new_weights[w]+= -2 * x_var * z           
  
    for i in range(len(weights)):
      temp = np.mean(new_weights[i])
      weights[i]-= lr* temp

    new_bias = -2 * z
    bias -= lr* (np.mean(new_bias))
       
    #print(weights,bias)
    return weights,bias   
  
  def train(self,data):
    """
    Function to train data using various optimization algorithms
    """
    weights = np.random.normal(-2,2,len(data.columns)-1)
    bias =random.randint(-1,1)
      
    cost =[]
    min_weights =[]
    minimum = 999.9
    min_bias =0
    
    for i in range(500):      
      weights,bias = self.stochastic_gradient_descent(data,weights,bias)
      
      temp = self.cost_function(data,weights,bias)
      if  temp < minimum :
        minimum = temp
        min_weight = weights
        min_bias = bias
        cost.append(minimum)
    
    print("Weights",min_weight,"bias",min_bias,"cost",minimum)
      
  def verify(self,data):
    """
    Function to verify if the builtin model is correct
    """
    model = LinearRegression()
    z = model.fit(data.iloc[:,:-1],data.iloc[:,-1])
    print(z.coef_,z.intercept_,z.score(data.iloc[:,:-1],data.iloc[:,-1]))
    
if __name__ == "__main__":
  #name_of_file =input()
  name_of_file ="Advertising.csv"
  obj = Linear_Regression(name_of_file)
  data = obj.load_data(obj.data)
  obj.train(data)
  obj.verify(data)

Saving Advertising.csv to Advertising (84).csv
Weights [ 3.91905261  2.79367935 -0.02410261] bias 14.02196576515344 cost 1.392064954615003
[ 3.91925365  2.79206274 -0.02253861] 14.0225 0.8972106381789521
