<a href="https://colab.research.google.com/github/VitikaJain25/Data_Science/blob/master/MultipleTypesOfRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Regression
- Linear Regression
- LASSO Regression
- RIDGE Regression
- SGD Regression

In [0]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
from google.colab import files  #To read file from local
import io
uploaded = files.upload()   # To upload file from local.

data = pd.read_csv(io.BytesIO(uploaded['diamonds_processed.csv']))

Saving diamonds_processed.csv to diamonds_processed.csv


In [4]:
data.shape

(10788, 20)

In [3]:
data.head()

Unnamed: 0,clarity,price,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_D,color_E,color_F,color_G,color_H,color_I,color_J,carat,depth,table,x,y,z
0,5,2347,0,0,1,0,0,0,0,0,1,0,0,0,-0.426762,0.183806,-0.215603,-0.297549,-0.252463,-0.237666
1,3,17108,0,0,1,0,0,0,0,1,0,0,0,0,1.588656,0.183806,-1.102293,1.480844,1.486613,1.420743
2,5,1838,0,0,1,0,0,0,0,0,1,0,0,0,-0.636701,0.183806,-0.658948,-0.582092,-0.540852,-0.505151
3,3,3625,0,0,1,0,0,1,0,0,0,0,0,0,-0.111853,-0.020553,-1.102293,0.067022,0.088361,0.069942
4,3,5729,1,0,0,0,0,0,1,0,0,0,0,0,0.412995,3.589792,-1.545638,0.431592,0.376751,0.818901


In [0]:
# Helper function to perform simple Linear Regression
from sklearn.linear_model import LinearRegression

def linear_model(x_train, y_train):
  print("Linear Regression")
  linear_regression = LinearRegression()
  linear_regression.fit(x_train, y_train)
  return linear_regression

In [0]:
# Helper function to perform LASSO Regression
# LASSO is regularized regression model
# alpha is strength of the regularization that we want to perform on our linear model
# alpha is a constant that multiplies the regularization penalty term, 
# which in the case of Lasso is the sum of the absolute values of the coefficients of our regression.

from sklearn.linear_model import Lasso

def lasso_model(x_train, y_train):
  print("Lasso Regression")
  lasso_regression = Lasso(alpha = 0.8, max_iter = 10000)
  lasso_regression.fit(x_train, y_train)
  return lasso_regression

In [0]:
# Helper function to perform RIDGE Regression

from sklearn.linear_model import Ridge

def ridge_model(x_train, y_train):
  print("Ridge Regression")
  ridge_regression = Ridge(alpha = 0.9)
  ridge_regression.fit(x_train, y_train)
  return ridge_regression

In [0]:
# To perform different kinds of regression

def build_and_train_model(data, target_name, reg_fn):
  X = data.drop(target_name, axis = 1)
  Y = data[target_name]

  x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

  model = reg_fn(x_train, y_train)

  score = model.score(x_train, y_train)
  print("Training Score: ", score)

  y_pred = model.predict(x_test)
  r_score = r2_score(y_test, y_pred)
  print("Test Score", r_score)

  return {'model' : model,
          'x_train' : x_train,
          'x_test' : x_test,
          'y_train' : y_train,
          'y_test' : y_test,
          'y_pred': y_pred}

In [11]:
linear_reg = build_and_train_model(data, 'price', linear_model)

Linear Regression
Training Score:  0.9091447208884681
Test Score 0.8906783376037026


In [12]:
lasso_reg = build_and_train_model(data, 'price', lasso_model)

Lasso Regression
Training Score:  0.9091329011526925
Test Score 0.8908237795556072


In [13]:
ridge_reg = build_and_train_model(data, 'price', ridge_model)

Ridge Regression
Training Score:  0.9091438449304262
Test Score 0.8907743310343261


In [14]:
# We can access useful details, as build_and_train_model return a dictionary with useful information.
linear_reg['model']

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [0]:
# Helper function to perform SGD Regression
# Useful for very large Datasets
from sklearn.linear_model import SGDRegressor

def sgd_model(x_train, y_train):
  print("SGD Regression")
  sgd_regression = SGDRegressor(max_iter = 2000)
  sgd_regression.fit(x_train, y_train)
  return sgd_regression

In [16]:
sgd_reg = build_and_train_model(data, 'price', sgd_model)

SGD Regression
Training Score:  0.9089869716368235
Test Score 0.8916776442664136
