In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns

pd.options.display.max_rows = 100

In [None]:
train1 = pd.read_csv('../input/ames-regression-cleaned/train.1H.noskew.csv', 
                     index_col='Id')
test1 = pd.read_csv('../input/ames-regression-cleaned/test.1H.noskew.csv', 
                    index_col='Id')
train2 = pd.read_csv('../input/ames-regression-cleaned/train.1H.skew.csv', 
                     index_col='Id')
test2 = pd.read_csv('../input/ames-regression-cleaned/test.1H.skew.csv', 
                    index_col='Id')
train3 = pd.read_csv('../input/ames-regression-cleaned/train.ALL.outno.csv', 
                     index_col='Id')
test3 = pd.read_csv('../input/ames-regression-cleaned/test.ALL.outno.csv', 
                    index_col='Id')
train4 = pd.read_csv('../input/ames-regression-cleaned/train.ALL.outyes.csv', 
                     index_col='Id')
test4 = pd.read_csv('../input/ames-regression-cleaned/test.ALL.outyes.csv', 
                    index_col='Id')
train5 = pd.read_csv('../input/ames-regression-cleaned/train.TA.noskew.csv', 
                     index_col='Id')
test5 = pd.read_csv('../input/ames-regression-cleaned/test.TA.noskew.csv', 
                    index_col='Id')
train6 = pd.read_csv('../input/ames-regression-cleaned/train.TA.skew.csv', 
                     index_col='Id')
test6 = pd.read_csv('../input/ames-regression-cleaned/test.TA.skew.csv', 
                    index_col='Id')

train_sets = [train1, train2, train3, train4, train5, train6]
test_sets = [test1, test2, test3, test4, test5, test6]
set_names = ['1H.noskew', '1H.skew', 'ALL.outno', 'ALL.outyes', 'TA.noskew', 'TA.skew']

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import cross_val_score

kfolds = KFold(n_splits=10, shuffle=True, random_state=7)

def rmse(y, y_pred):
    return np.sqrt(mean_squared_error(np.expm1(y), np.expm1(y_pred)))

def rmsle(y, y_pred):  # because y and y_pred have been log transformed already
    return np.sqrt(mean_squared_error(y, y_pred))

def cv_rmse(model, X, y):
    rmsle = np.sqrt(-cross_val_score(model, X, y,
                                    scoring="neg_mean_squared_error",
                                    cv=kfolds))
    return rmsle

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import Ridge, ElasticNet, Lasso
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials
from hyperopt.pyll.base import scope

### Running linear models : Lasso, Ridge and ElasticNet.
Tuning the hyperparameters with `Hyperopt`

In [None]:
space_lasso = {
    'alpha' : hp.loguniform('alpha', -10, 10)
}
space_ridge = {
    'alpha' : hp.loguniform('alpha', -10, 10)
}
space_elastic = {
    'alpha' : hp.loguniform('alpha', -10, 10),
    'l1_ratio' : hp.uniform('l1_ratio', 0, 1)
}

model_names = ['Lasso', 'Ridge', 'ElasticNet']
spaces = [space_lasso, space_ridge, space_elastic]
models = [Lasso, Ridge, ElasticNet]

In [None]:
params = {
    'Lasso' : [],
    'Ridge' : [],
    'ElasticNet' : [],
}

for set_name, train, test in zip(set_names, train_sets, test_sets):
    y = train.pop('SalePrice')
    X = train

    for model_name, space, model in zip(model_names, spaces, models):

        def objective(space_):
            rmsle = cv_rmse(model(**space_, normalize=True, random_state=7), X, y).mean()
            return {'loss': rmsle, 'status': STATUS_OK }

        trials = Trials()
        rstate = np.random.RandomState(7)
        best = fmin(fn = objective, space = space, algo = tpe.suggest, 
                    max_evals = 250, trials = trials, rstate=rstate)
        print('For', model_name, 'the best parameters are: \n', best)
        
        best_predictor = model(**best, normalize=True, random_state=7)
        best_predictor.fit(X, y)
        y_pred = np.expm1(best_predictor.predict(test))
        
        best['set_name'] = set_name
        best['best_loss'] = min(trials.losses())
        params[model_name].append(best)
         
        submission = pd.read_csv('../input/ames-regression-cleaned/sample_submission.csv', index_col='Id')
        submission['SalePrice'] = y_pred
        submission.to_csv(f'{set_name}.{model_name}.csv')
        
    print('Set ', set_name, 'completed.\n')

In [None]:
pd.DataFrame(params['Lasso'])[['set_name', 'best_loss', 'alpha']]

In [None]:
pd.DataFrame(params['Ridge'])[['set_name', 'best_loss', 'alpha']]

In [None]:
pd.DataFrame(params['ElasticNet'])[['set_name', 'best_loss', 'alpha', 'l1_ratio']]