**------------------------------------------------------------------------------------------------------------------------**
# Model Building
**------------------------------------------------------------------------------------------------------------------------**

In [6]:
import pandas as pd
house = pd.read_csv('housing.csv')

In [7]:
#Select the predictors and the measure
X = house.drop(["Id", "SalePrice", "TransformedPrice"], axis=1).values
y = house["TransformedPrice"].values

In [8]:
# split into train and test
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state = 1)

In [9]:
# list of alphas to run the "RidgeCV" and "LassoCV" models on
alphas= [0.0001, 0.001, 0.01, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20, 50, 100, 1000 ]

In [10]:
# Importing the relevant libraries
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.model_selection import GridSearchCV

---
## Linear Model
---

In [11]:
#First build a linear model
from sklearn.metrics import mean_squared_error
import numpy as np
def rmse(y_train, y_pred):
    return np.sqrt(mean_squared_error(y_train, y_pred))

In [12]:
lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression()

In [13]:
print("RMSE on train:",round(rmse(y_train,lr.predict(X_train)),4))
print("RMSE on test:",round(rmse(y_test,lr.predict(X_test)),4))

RMSE on train: 0.1199
RMSE on test: 0.1839


---
## Lasso
---
1. Run Lasso with alpha = 1

In [14]:
# Applying Lasso
lasso = Lasso(alpha=1)           

lasso.fit(X_train, y_train) 

Lasso(alpha=1)

In [15]:
print("RMSE on train:",round(rmse(y_train,lasso.predict(X_train)),3))
print("RMSE on test:",round(rmse(y_test,lasso.predict(X_test)),3))

RMSE on train: 0.4
RMSE on test: 0.399


### LassoCV 
2. Run LassoCV and find best alpha value (from the list of alphas given earlier)
3. In the best model, find % of features that are eliminated
4. RMSE for the best Lasso model on the test set

In [16]:
lassocv = LassoCV(alphas=alphas,cv=5)
lassocv.fit(X_train,y_train)
print('alpha: %f' % lassocv.alpha_)

alpha: 0.001000


In [17]:
print("Coefficients turning zero:",round((lassocv.coef_==0).sum()/(lassocv.coef_==0).__len__()*100),'%')

Coefficients turning zero: 15 %


In [18]:
print("RMSE on train:",round(rmse(y_train,lassocv.predict(X_train)),4))
print("RMSE on test:",round(rmse(y_test,lassocv.predict(X_test)),4))

RMSE on train: 0.1203
RMSE on test: 0.1824


In [15]:
print("Train score:",round(lassocv.score(X_train,y_train),3))
print("Test score:",round(lassocv.score(X_test,y_test),3))

Train score: 0.909
Test score: 0.791


---
## Ridge
---
1. Run Ridge with alpha = 1

In [16]:
# Applying Ridge
ridge = Ridge(alpha=1)           

ridge.fit(X_train, y_train) 

Ridge(alpha=1)

In [18]:
print("RMSE on train:",round(rmse(y_train,ridge.predict(X_train)),3))
print("RMSE on test:",round(rmse(y_test,ridge.predict(X_test)),3))

RMSE on train: 0.12
RMSE on test: 0.184


### RidgeCV 
2. Run RidgeCV and find best alpha value (from the list of alphas given earlier)
3. In the best model, find % of features that are eliminated
4. RMSE for the best Ridge model on the test set

In [19]:
ridgecv = RidgeCV(alphas=alphas,cv=5)
ridgecv.fit(X_train,y_train)
print('alpha: %f' % ridgecv.alpha_)

alpha: 100.000000


In [20]:
print("Difference between coefficients(sample):",round((lr.coef_[0]-ridgecv.coef_[0])/(lr.coef_[0])*100),'%')

Difference between coefficients(sample): 39 %


In [21]:
print("RMSE on train:",round(rmse(y_train,ridgecv.predict(X_train)),4))
print("RMSE on test:",round(rmse(y_test,ridgecv.predict(X_test)),4))

RMSE on train: 0.121
RMSE on test: 0.1817


In [22]:
print("Train score:",round(ridgecv.score(X_train,y_train),3))
print("Test score:",round(ridgecv.score(X_test,y_test),3))

Train score: 0.908
Test score: 0.793
