In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("../DATA/final_df.csv")

In [3]:
df.head()

Unnamed: 0,1stFlrSF,2ndFlrSF,3SsnPorch,BedroomAbvGr,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,BsmtCond_Gd,BsmtCond_None,...,SaleType_WD,ScreenPorch,Street_Pave,TotRmsAbvGrd,TotalBsmtSF,Utilities_NoSeWa,WoodDeckSF,YearBuilt,YearRemodAdd,YrSold
0,856,854,0,3,0,0,0,0,0,0,...,1,0,1,8,856,0,0,2003,2003,2008
1,1262,0,0,3,0,0,0,0,0,0,...,1,0,1,6,1262,0,298,1976,1976,2007
2,920,866,0,3,0,0,0,0,0,0,...,1,0,1,6,920,0,0,2001,2002,2008
3,961,756,0,3,0,0,0,0,1,0,...,1,0,1,7,756,0,0,1915,1970,2006
4,1145,1053,0,4,0,0,0,0,0,0,...,1,0,1,9,1145,0,192,2000,2000,2008


In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X = df.drop("SalePrice", axis=1)
y = df["SalePrice"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## RIDGE Regression

In [9]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

In [10]:
ridge_model = Ridge() ## Base Ridge Model

param_grid ={'alpha':[0.1,1,5,10,50,100]}

grid_model = GridSearchCV(ridge_model , param_grid,
                          scoring='neg_mean_squared_error',
                          cv=5, verbose=1)
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    1.2s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [0.1, 1, 5, 10, 50, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=1)

In [11]:
print(grid_model.best_params_)
print(grid_model.best_estimator_)

{'alpha': 100}
Ridge(alpha=100, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [12]:
ridge_predictions = grid_model.predict(X_test)

In [13]:
from sklearn.metrics import mean_squared_error , mean_absolute_error

MAE = mean_absolute_error(y_test , ridge_predictions)
MSE = mean_squared_error(y_test , ridge_predictions)
RMSE = np.sqrt(mean_squared_error(y_test , ridge_predictions))

In [14]:
MAE, MSE , RMSE

(16622.434889273474, 627778456.5453106, 25055.507509234583)

## LASSO Regression

In [15]:
from sklearn.linear_model import Lasso

lasso_model = Lasso() ## Base Ridge Model

param_grid ={'alpha':[0.1,1,5,10,50,100]}

grid_model = GridSearchCV(lasso_model , param_grid,
                          scoring='neg_mean_squared_error',
                          cv=5, verbose=1)
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    9.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=None,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [0.1, 1, 5, 10, 50, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=1)

In [16]:
print(grid_model.best_params_)
print(grid_model.best_estimator_)

{'alpha': 100}
Lasso(alpha=100, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


In [17]:
lasso_predictions = grid_model.predict(X_test)

MAE = mean_absolute_error(y_test , lasso_predictions)
MSE = mean_squared_error(y_test , lasso_predictions)
RMSE = np.sqrt(mean_squared_error(y_test , lasso_predictions))

In [18]:
MAE, MSE , RMSE

(16199.35251322005, 615214442.4640465, 24803.516735818863)

## ELASTICNET Regression

In [19]:
from sklearn.linear_model import ElasticNet

elastic_model = ElasticNet() ## Base Ridge Model

param_grid = {'alpha':[0.1,1,5,10,50,100],
              'l1_ratio':[.1, .5, .7, .9, .95, .99, 1]}

grid_model = GridSearchCV(elastic_model , param_grid,
                          scoring='neg_mean_squared_error',
                          cv=5, verbose=1)
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
[Parallel(n_jobs=1)]: Done 210 out of 210 | elapsed:   26.3s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [0.1, 1, 5, 10, 50, 100],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=1)

In [20]:
print(grid_model.best_params_)
print(grid_model.best_estimator_)

{'alpha': 10, 'l1_ratio': 0.99}
ElasticNet(alpha=10, copy_X=True, fit_intercept=True, l1_ratio=0.99,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)


In [21]:
elastic_predictions = grid_model.predict(X_test)

MAE = mean_absolute_error(y_test , elastic_predictions)
MSE = mean_squared_error(y_test , elastic_predictions)
RMSE = np.sqrt(mean_squared_error(y_test , elastic_predictions))

In [22]:
MAE, MSE , RMSE

(16615.866617639284, 626850866.7885984, 25036.98997061345)