# Libraries

In [21]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

from sklearn.model_selection import train_test_split

# for the models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold, cross_val_score

# warnings
import warnings
warnings.filterwarnings(action="ignore")

## Get new clean data

In [2]:
train = pd.read_csv("./train_final.csv")
test = pd.read_csv("./test_final.csv")

## Modeling

In [16]:
# separate predictors and prediction
y = train["SalePrice"]
X = train.drop("SalePrice", axis=1)

# k-folds
kfold = KFold(n_splits=10, random_state=0, shuffle=True)


# functions for error score
def rmse(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))

def cv_rmse(model):
    rmse = np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=kfold))
    return (rmse)


# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .30, random_state = 0)

Multiple Linear Regression

In [39]:
lm = LinearRegression()
lm.fit(X_train, y_train)

# Predict test data. 
y_pred = lm.predict(X_test)

print('R^2 is equal to %.4f' %(lm.score(X_test, y_test)))
print('RMSE is equal to %.4f' %rmse(y_test, y_pred))

print("-"*70)

# CV
score = cv_rmse(lm)
print("CV LM RMSE score is {:.4f}".format(score.mean()))

R^2 is equal to 0.8962
RMSE is equal to 24316.4612
----------------------------------------------------------------------
CV LM score is 23820.8770


Ridge

In [40]:
ridge = Ridge(alpha=0.1, normalize=True)
ridge.fit(X_train, y_train)

# Predict test data. 
y_pred = ridge.predict(X_test)

print('R^2 is equal to %.4f' %(ridge.score(X_test, y_test)))
print('RMSE is equal to %.4f' %rmse(y_test, y_pred))

print("-"*70)

# CV
score = cv_rmse(ridge)
print("CV Ridge RMSE score is {:.4f}".format(score.mean()))

R^2 is equal to 0.9073
RMSE is equal to 22972.2337
----------------------------------------------------------------------
CV Ridge score is 24201.8182


Lasso

In [41]:
lasso = Lasso(alpha=0.1, normalize=True)
lasso.fit(X_train, y_train)

## Predict test data. 
pred = lasso.predict(X_test)

print('R^2 is equal to %.4f' %(lasso.score(X_test, y_test)))
print('RMSE is equal to %.4f' %np.sqrt(mean_squared_error(y_test, y_pred)))

print("-"*70)

# CV
score = cv_rmse(lasso)
print("CV Lasso RMSE score is {:.4f}".format(score.mean()))

R^2 is equal to 0.8961
RMSE is equal to 22972.2337
----------------------------------------------------------------------
CV Lasso score is 23806.0864


ElasticNet

In [45]:
elasticnet = ElasticNet(alpha=0.1, l1_ratio=0.5, normalize=False)
elasticnet.fit(X_train, y_train)

## Predict test data. 
pred = elasticnet.predict(X_test)

print(r'The R^2 is %.4f' %(elasticnet.score(X_test, y_test)))
print('RMSE is equal to %.4f' %np.sqrt(mean_squared_error(y_test, y_pred)))

print("-"*70)

# CV
score = cv_rmse(elasticnet)
print("CV ElasticNet RMSE score is {:.4f}".format(score.mean()))

The R^2 is 0.8913
RMSE is equal to 22972.2337
----------------------------------------------------------------------
CV ElasticNet score is 25923.8078
