In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [None]:
data = pd.read_csv('train.csv', usecols = ['LotArea', 'YearBuilt','GarageCars', 'OverallCond', 'SalePrice'])

In [None]:
data.info()

In [None]:
X = data[['LotArea', 'YearBuilt','GarageCars', 'OverallCond']].values
y = data.SalePrice

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 42)

In [None]:
lr_r = Ridge(alpha=1.0)
lr_r.fit(X_train, y_train)
test_pred = lr_r.predict(X_test)
train_pred = lr_r.predict(X_train)
print('rmse on train', math.sqrt(mean_squared_error(y_train, train_pred)))
print('rmse on test', math.sqrt(mean_squared_error(y_test, test_pred)))

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid={'alpha': [0.01, 0.1,1,10,100],
            'fit_intercept': [True, False],
            'solver':  ['auto', 'saga']}
lr_r = Ridge()
gs = GridSearchCV(lr_r, param_grid, cv=5)

gs.fit(X_train, y_train)
print("Best: %f using %s" % (gs.best_score_, gs.best_params_))

In [None]:
lr_r = Ridge(alpha=0.01)
lr_r.fit(X_train, y_train)
test_pred = lr_r.predict(X_test)
train_pred = lr_r.predict(X_train)
print('rmse on train', math.sqrt(mean_squared_error(y_train, train_pred)))
print('rmse on test', math.sqrt(mean_squared_error(y_test, test_pred)))

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

param_grid_rand={'alpha': uniform(0, 100)}

lr_r = Ridge()
gs = RandomizedSearchCV(lr_r, param_grid_rand, cv=5)

gs.fit(X_train, y_train)
print("Best: %f using %s" % (gs.best_score_, gs.best_params_))

In [None]:
lr_r = Ridge(alpha=4.25)
lr_r.fit(X_train, y_train)
test_pred = lr_r.predict(X_test)
train_pred = lr_r.predict(X_train)
print('rmse on train', math.sqrt(mean_squared_error(y_train, train_pred)))
print('rmse on test', math.sqrt(mean_squared_error(y_test, test_pred)))