In [1]:
import numpy as np
import pandas as pd
import pickle as pk
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
from sklearn.linear_model import Lasso
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as ms

In [25]:
out = open('data/features.pickle', 'rb')
features = pk.load(out)
out = open('data/labels.pickle', 'rb')
labels = pk.load(out)
out = open('data/test.pickle', 'rb')
test = pk.load(out)
out.close()

In [26]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=12)
X_train.shape, X_test.shape, y_train.shape, y_test.shape, test.shape

((1019, 70), (437, 70), (1019,), (437,), (1459, 70))

In [27]:
# ----------------------XGBRegressor------------------

model = XGBRegressor(random_state=2, max_depth=6, base_score=0.2)
(model.fit(X_train, np.log(y_train)))
pred1 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred1))
print("\nXGRegression score:", score)


XGRegression score: 0.12406661804687603


In [28]:
# ------------------RandomForestRegressor-----------------

model = RandomForestRegressor(random_state=2, n_estimators=90, max_depth=13)
(model.fit(X_train, np.log(y_train)))
pred2 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred2))
print("\nRandomForestRegressor score:", score)


RandomForestRegressor score: 0.13780263921630648


In [29]:
# ----------------------LassoLarsIC--------------------

model = LassoLarsIC(normalize=False)
(model.fit(X_train, np.log(y_train)))
pred3 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred3))
print("\nLassoLarsIC score:", score)


LassoLarsIC score: 0.1383388949709899


In [30]:
# ---------------------BayesianRidge--------------------

model = BayesianRidge()
(model.fit(X_train, np.log(y_train)))
pred4 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred4))
print("\nBayesianRidge score:", score)


BayesianRidge score: 0.11756096114839597


In [31]:
# -----------------GradientBoostingRegressor--------------------

model = GradientBoostingRegressor()
(model.fit(X_train, np.log(y_train)))
pred5 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred5))
print("\nGradientBoostingRegressor score:", score)


GradientBoostingRegressor score: 0.12722377952144814


In [32]:
# -------------------Lasso----------------------

model = Lasso(alpha=0.0002, random_state=2)
(model.fit(X_train, np.log(y_train)))
pred6 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred6))
print("\nLasso score:", score)


Lasso score: 0.11836704860670083


In [33]:
# -------------------ElasticNet----------------------

model = ElasticNet(random_state=2, alpha=0.0001)
(model.fit(X_train, np.log(y_train)))
pred7 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred7))
print("\nElasticNet score:", score)


ElasticNet score: 0.11813518146984922


In [34]:
# -------------------KernelRidge----------------------

model = KernelRidge(alpha=0.006)
(model.fit(X_train, np.log(y_train)))
pred8 = model.predict(X_test)
score = np.sqrt(ms(np.log(y_test), pred8))
print("\nKernelRidge score:", score)


KernelRidge score: 0.11755709745310347


In [35]:
new_training = pd.DataFrame()

new_training['pred1'] = pred1
new_training['pred2'] = pred2
new_training['pred3'] = pred3
new_training['pred4'] = pred4
new_training['pred5'] = pred5
new_training['pred6'] = pred6
new_training['pred7'] = pred7
new_training['pred8'] = pred8

In [36]:
model_new = RandomForestRegressor(random_state=2)
model_new.fit(new_training, np.log(y_test))



RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=2, verbose=0, warm_start=False)

In [37]:
# ----------------------XGBRegressor------------------

model = XGBRegressor(random_state=2, max_depth=6, base_score=0.2)
(model.fit(X_train, np.log(y_train)))
pred1 = model.predict(test)

# ------------------RandomForestRegressor-----------------

model = RandomForestRegressor(random_state=2, n_estimators=90, max_depth=13)
(model.fit(X_train, np.log(y_train)))
pred2 = model.predict(test)

# ----------------------LassoLarsIC--------------------

model = LassoLarsIC(normalize=False)
(model.fit(X_train, np.log(y_train)))
pred3 = model.predict(test)

# ---------------------BayesianRidge--------------------

model = BayesianRidge()
(model.fit(X_train, np.log(y_train)))
pred4 = model.predict(test)

# -----------------GradientBoostingRegressor--------------------

model = GradientBoostingRegressor()
(model.fit(X_train, np.log(y_train)))
pred5 = model.predict(test)

# -------------------Lasso----------------------

model = Lasso(alpha=0.0002, random_state=2)
(model.fit(X_train, np.log(y_train)))
pred6 = model.predict(test)

# -------------------ElasticNet----------------------

model = ElasticNet(random_state=2, alpha=0.0001)
(model.fit(X_train, np.log(y_train)))
pred7 = model.predict(test)

# -------------------KernelRidge----------------------

model = KernelRidge(alpha=0.006)
(model.fit(X_train, np.log(y_train)))
pred8 = model.predict(test)

In [38]:
new_testing = pd.DataFrame()

new_testing['pred1'] = pred1
new_testing['pred2'] = pred2
new_testing['pred3'] = pred3
new_testing['pred4'] = pred4
new_testing['pred5'] = pred5
new_testing['pred6'] = pred6
new_testing['pred7'] = pred7
new_testing['pred8'] = pred8

In [39]:
test = pd.read_csv('data/test.csv')
index = test['Id']

In [40]:
pred_sumb = model_new.predict(new_testing)
my_out = pd.DataFrame(np.exp(pred_sumb), index, columns=['SalePrice'])
my_out.to_csv('data/my_out.csv')