In [1]:
#to avoid warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd

In [3]:
data = pd.read_csv("data/salesprice_processed.csv")

In [4]:
data.isna().sum()

MSSubClass               0
LotFrontage              0
LotArea                  0
OverallQual              0
OverallCond              0
                        ..
SaleCondition_AdjLand    0
SaleCondition_Alloca     0
SaleCondition_Family     0
SaleCondition_Normal     0
SaleCondition_Partial    0
Length: 271, dtype: int64

In [5]:
data.shape

(1460, 271)

In [7]:
#creating X and y Datasets
y=data['SalePrice']
X=data.drop('SalePrice',axis=1)

In [8]:
y.head()

0    208500.0
1    181500.0
2    223500.0
3    140000.0
4    250000.0
Name: SalePrice, dtype: float64

In [9]:
X.shape

(1460, 270)

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,random_state=42) #0.67 data will be for training.

### Ridge Regression

In [11]:
#Ridge Regression
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge

alphavalues = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20,30]
solvervalues=['auto','svd','sag']
ridge = Ridge()

parameters = {'alpha': alphavalues,'solver':solvervalues}

ridge_regressor = GridSearchCV(ridge, parameters,cv=5)

ridge_regressor.fit(X_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.0001, 0.001, 0.01, 1,
                                   5, 10, 20, 30],
                         'solver': ['auto', 'svd', 'sag']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [12]:
#to get the best parameter for the model
ridge_regressor.best_params_

{'alpha': 10, 'solver': 'svd'}

Once we identified the best paramters we can re build the model using best parameters

In [19]:
ridge_regressor = Ridge(alpha = 10, solver = 'svd')

In [20]:
ridge_regressor.fit(X_train, y_train)

Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None, normalize=False,
      random_state=None, solver='svd', tol=0.001)

In [21]:
ypred_ridge = ridge_regressor.predict(X_test)

In [22]:
from sklearn.metrics import mean_squared_error #calculating MSE
Ridge_Error=mean_squared_error(y_test,ypred_ridge)
Ridge_Error

373715436.3544451

In [23]:
from sklearn.metrics import r2_score
r2_score(y_test,ypred_ridge)

0.9194295598122134

### Lasso Regression

In [24]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

alphavalues = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20,30]
lasso = Lasso()

parameters = {'alpha': alphavalues}

lasso_regressor = GridSearchCV(lasso, parameters,cv=5)

lasso_regressor.fit(X_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=None,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.0001, 0.001, 0.01, 1,
                                   5, 10, 20, 30]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [25]:
#to get the best parameter for the model
lasso_regressor.best_params_

{'alpha': 30}

In [26]:
lasso_regressor=Lasso(alpha=30)
lasso_regressor.fit(X_train,y_train)
ypred_lasso=lasso_regressor.predict(X_test)

In [28]:
Lasso_Error=mean_squared_error(y_test,ypred_lasso)
Lasso_Error

382078968.61720824

In [29]:
r2_score(y_test,ypred_lasso)

0.9176264406194155

### Elastic Net

In [30]:
from sklearn.linear_model import ElasticNet

elastic = ElasticNet()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

elastic_regressor = GridSearchCV(elastic, parameters)

elastic_regressor.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.0001, 0.001, 0.01, 1,
                                   5, 10, 20]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [31]:
elastic_regressor.best_params_

{'alpha': 0.01}

In [32]:
elastic_regressor=ElasticNet(alpha=0.01)
elastic_regressor.fit(X_train,y_train)
ypred_elastic=elastic_regressor.predict(X_test)

In [33]:
elastic_Error=mean_squared_error(y_test,ypred_elastic)
elastic_Error

386247285.5451658

In [35]:
r2_score(y_test,ypred_elastic)

0.9167277805774223

In [41]:
#RMSE
import math
RMSE_ridge = math.sqrt(Ridge_Error)
RMSE_ridge

19331.720987911165

We are creating an error dictionary

In [37]:
ErrorDict={'ElasticNet':elastic_Error,'LassoReg':Lasso_Error,'RidgeReg':Ridge_Error}

In [38]:
ErrorDict

{'ElasticNet': 386247285.5451658,
 'LassoReg': 382078968.61720824,
 'RidgeReg': 373715436.3544451}

### after evaluating the errors, Ridge Regression is the best fit for the dataset given

### Save the Model

In [42]:
#save the model for later
#pickle file with .pkl extension is a binary file saved.

from sklearn.externals import joblib
joblib.dump(ridge_regressor, 'model/LandPriceModel.pkl')

['model/LandPriceModel.pkl']

In [43]:
model2=joblib.load('model/LandPriceModel.pkl')

In [44]:
ypred=model2.predict(X_test)

In [54]:
import joblib
joblib.dump(ridge_regressor, 'model/LandPriceMode2.pkl')

['model/LandPriceMode2.pkl']

In [55]:
model2=joblib.load('model/LandPriceMode2.pkl')

In [56]:
ypred=model2.predict(X_test)

In [58]:
joblib.dump(ridge_regressor,r"C:\Users\Christy\Desktop\ml model\model.pkl")

['C:\\Users\\Christy\\Desktop\\ml model\\model.pkl']

In [46]:
#END