In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.cross_validation import KFold
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn import grid_search
from sklearn.metrics import mean_squared_error, make_scorer
from xgboost import XGBRegressor



In [2]:
def mean_squared_error_(ground_truth, predictions):
    return mean_squared_error(ground_truth, predictions) ** 0.5

RMSE = make_scorer(mean_squared_error_, greater_is_better=False)

In [7]:
class Ensemble(object):
    def __init__(self, n_folds, stacker, base_models):
        self.n_folds = n_folds
        self.stacker = stacker
        self.base_models = base_models

    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)

        folds = list(KFold(len(y), n_folds=self.n_folds, shuffle=True, random_state=2016))

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((T.shape[0], len(self.base_models)))

        for i, clf in enumerate(self.base_models):

            print('Fitting For Base Model #{0} / {1} ---'.format(i+1, len(self.base_models)))

            S_test_i = np.zeros((T.shape[0], len(folds)))

            for j, (train_idx, test_idx) in enumerate(folds):

                print('--- Fitting For Fold #{0} / {1} ---'.format(j+1, self.n_folds))

                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                # y_holdout = y[test_idx]
                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_holdout)[:]
                S_train[test_idx, i] = y_pred
                S_test_i[:, j] = clf.predict(T)[:]

                print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

            S_test[:, i] = S_test_i.mean(1)

            print('Elapsed: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        print('--- Base Models Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))
        
        
        param_grid = {
            'n_estimators': [50,60,70,80,90,100,110,120],
            'learning_rate': [0.05],
            'subsample': [0.75]
        }
        grid = grid_search.GridSearchCV(estimator=self.stacker, param_grid=param_grid, n_jobs=1, cv=5, verbose=20, scoring=RMSE)
        grid.fit(S_train, y)

        try:
            print('Param grid:')
            print(param_grid)
            print('Best Params:')
            print(grid.best_params_)
            print('Best CV Score:')
            print(-grid.best_score_)
            print('Best estimator:')
            print(grid.best_estimator_)
            print(message)
        except:
            pass

        print('--- Stacker Trained: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

        y_pred = grid.predict(S_test)[:]

        return y_pred

In [8]:
df_all = pd.read_csv('df_all.csv', encoding='ISO-8859-1', index_col=0)
num_train = 74067
df_train = df_all.iloc[:num_train]
#df_train = df_train.iloc[:round(len(df_train)/20)]
df_test = df_all.iloc[num_train:]
#df_test = df_test.iloc[:round(len(df_test)/20)]

id_test = df_test['id']
y_train = df_train['relevance'].values

cols_to_drop = ['id', 'relevance']
for col in cols_to_drop:
    try:
        df_train.drop(col, axis=1, inplace=True)
        df_test.drop(col, axis=1, inplace=True)
    except:
        continue

X_train = df_train[:]
X_test = df_test[:]

print('Number of Features: ', len(X_train.columns.tolist()))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Number of Features:  126


In [9]:
base_models = [
    RandomForestRegressor(
        n_jobs=1, random_state=2016, verbose=1,
        n_estimators=500, max_features=12
    ),
    ExtraTreesRegressor(
        n_jobs=1, random_state=2016, verbose=1,
        n_estimators=500, max_features=12
    ),
    GradientBoostingRegressor(
        random_state=2016, verbose=1,
        n_estimators=500, max_features=12, max_depth=8,
        learning_rate=0.05, subsample=0.8
    ),
    XGBRegressor(
        seed=2016,
        n_estimators=200, max_depth=8,
        learning_rate=0.05, subsample=0.8, colsample_bytree=0.85
    )
]
ensemble = Ensemble(
    n_folds=5,
    stacker=GradientBoostingRegressor(
        random_state=2017, verbose=1
    ),
    base_models=base_models
)


In [10]:
start_time = time.time()
y_pred = ensemble.fit_predict(X=X_train, y=y_train, T=X_test)
for i in range(len(y_pred)):
    if y_pred[i] < 1.0:
        y_pred[i] = 1.0
    if y_pred[i] > 3.0:
        y_pred[i] = 3.0
#pd.DataFrame({'id': id_test, 'relevance': y_pred}).to_csv('submission_ensemble.csv', index=False)

print('--- Submission Generated: %s minutes ---' % round(((time.time() - start_time) / 60), 2))

Fitting For Base Model #1 / 4 ---
--- Fitting For Fold #1 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 22.5min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    8.3s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  1.8min finished


Elapsed: 24.46 minutes ---
--- Fitting For Fold #2 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 23.8min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    9.1s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.0min finished


Elapsed: 50.42 minutes ---
--- Fitting For Fold #3 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 24.6min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    8.3s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  1.8min finished


Elapsed: 77.03 minutes ---
--- Fitting For Fold #4 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 22.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    8.6s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  1.8min finished


Elapsed: 101.14 minutes ---
--- Fitting For Fold #5 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 22.2min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    8.3s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  1.8min finished


Elapsed: 125.33 minutes ---
Elapsed: 125.33 minutes ---
Fitting For Base Model #2 / 4 ---
--- Fitting For Fold #1 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  9.5min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   10.8s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.3min finished


Elapsed: 137.39 minutes ---
--- Fitting For Fold #2 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  9.3min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.0s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.3min finished


Elapsed: 149.31 minutes ---
--- Fitting For Fold #3 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed: 10.1min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.0s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.3min finished


Elapsed: 162.05 minutes ---
--- Fitting For Fold #4 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  9.9min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   10.9s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.4min finished


Elapsed: 174.53 minutes ---
--- Fitting For Fold #5 / 5 ---


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  9.9min finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   11.1s finished
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  2.3min finished


Elapsed: 187.07 minutes ---
Elapsed: 187.08 minutes ---
Fitting For Base Model #3 / 4 ---
--- Fitting For Fold #1 / 5 ---
      Iter       Train Loss      OOB Improve   Remaining Time 
         1           0.2767           0.0064           14.97m
         2           0.2723           0.0060           14.82m
         3           0.2662           0.0054           15.02m
         4           0.2599           0.0050           14.94m
         5           0.2559           0.0044           14.98m
         6           0.2497           0.0042           14.94m
         7           0.2463           0.0038           14.95m
         8           0.2416           0.0034           14.87m
         9           0.2389           0.0029           14.77m
        10           0.2351           0.0030           14.70m
        20           0.2102           0.0012           14.00m
        30           0.1956           0.0005           13.70m
        40           0.1873           0.0004           13.27m
        5

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.8s remaining:    0.0s


         1           0.2773           0.0083            6.07s
         2           0.2699           0.0076            6.13s
         3           0.2621           0.0068            6.12s
         4           0.2566           0.0061            5.88s
         5           0.2499           0.0056            5.74s
         6           0.2441           0.0051            5.54s
         7           0.2394           0.0046            5.39s
         8           0.2373           0.0041            5.29s
         9           0.2337           0.0037            5.24s
        10           0.2288           0.0034            5.13s
        20           0.2084           0.0012            3.83s
        30           0.1994           0.0005            2.57s
        40           0.1964           0.0002            1.26s
        50           0.1959           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.75, score=-0.439747 -   6.3s
[CV] learning_rate=0.05, n_estimators=50, subsam

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   12.2s remaining:    0.0s


         1           0.2781           0.0084            6.36s
         2           0.2714           0.0077            6.03s
         3           0.2638           0.0069            5.90s
         4           0.2573           0.0061            5.68s
         5           0.2523           0.0055            5.53s
         6           0.2461           0.0050            5.38s
         7           0.2419           0.0046            5.24s
         8           0.2376           0.0042            5.12s
         9           0.2338           0.0038            4.98s
        10           0.2292           0.0034            4.85s
        20           0.2083           0.0012            3.59s
        30           0.2002           0.0005            2.40s
        40           0.1980           0.0002            1.20s
        50           0.1972           0.0001            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.75, score=-0.436658 -   6.1s
[CV] learning_rate=0.05, n_estimators=50, subsam

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   18.4s remaining:    0.0s


         1           0.2773           0.0084            6.18s
         2           0.2705           0.0075            5.87s
         3           0.2627           0.0067            5.71s
         4           0.2554           0.0063            5.56s
         5           0.2502           0.0056            5.50s
         6           0.2449           0.0050            5.34s
         7           0.2402           0.0046            5.28s
         8           0.2360           0.0041            5.16s
         9           0.2324           0.0038            5.05s
        10           0.2276           0.0035            4.92s
        20           0.2073           0.0012            3.71s
        30           0.1993           0.0005            2.46s
        40           0.1971           0.0002            1.22s
        50           0.1961           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.75, score=-0.441306 -   6.1s
[CV] learning_rate=0.05, n_estimators=50, subsam

[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   24.6s remaining:    0.0s


         1           0.2754           0.0082            6.11s
         2           0.2669           0.0074            5.87s
         3           0.2611           0.0065            5.68s
         4           0.2526           0.0061            5.54s
         5           0.2469           0.0055            5.44s
         6           0.2418           0.0049            5.27s
         7           0.2378           0.0045            5.21s
         8           0.2322           0.0040            5.07s
         9           0.2306           0.0037            4.97s
        10           0.2258           0.0033            4.91s
        20           0.2057           0.0013            3.68s
        30           0.1966           0.0005            2.45s
        40           0.1949           0.0002            1.22s
        50           0.1934           0.0001            0.00s
[CV]  learning_rate=0.05, n_estimators=50, subsample=0.75, score=-0.449841 -   6.1s
[CV] learning_rate=0.05, n_estimators=60, subsam

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   30.9s remaining:    0.0s


         1           0.2748           0.0085            7.21s
         2           0.2684           0.0077            6.93s
         3           0.2617           0.0068            6.99s
         4           0.2551           0.0062            6.91s
         5           0.2491           0.0056            6.74s
         6           0.2430           0.0051            6.66s
         7           0.2384           0.0046            6.51s
         8           0.2347           0.0042            6.35s
         9           0.2316           0.0038            6.18s
        10           0.2277           0.0034            5.95s
        20           0.2072           0.0013            4.46s
        30           0.1981           0.0005            3.45s
        40           0.1949           0.0002            2.31s
        50           0.1941           0.0000            1.15s
        60           0.1943           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=60, subsample=0.75, score=-0.44

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   37.9s remaining:    0.0s


         1           0.2773           0.0083            7.24s
         2           0.2699           0.0076            7.49s
         3           0.2621           0.0068            7.40s
         4           0.2566           0.0061            7.15s
         5           0.2499           0.0056            6.95s
         6           0.2441           0.0051            6.76s
         7           0.2394           0.0046            6.65s
         8           0.2373           0.0041            6.46s
         9           0.2337           0.0037            6.31s
        10           0.2288           0.0034            6.18s
        20           0.2084           0.0012            4.84s
        30           0.1994           0.0005            3.60s
        40           0.1964           0.0002            2.41s
        50           0.1959           0.0000            1.20s
        60           0.1961           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=60, subsample=0.75, score=-0.43

[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   45.2s remaining:    0.0s


         1           0.2781           0.0084            7.47s
         2           0.2714           0.0077            7.42s
         3           0.2638           0.0069            7.27s
         4           0.2573           0.0061            7.02s
         5           0.2523           0.0055            6.82s
         6           0.2461           0.0050            6.71s
         7           0.2419           0.0046            6.58s
         8           0.2376           0.0042            6.41s
         9           0.2338           0.0038            6.29s
        10           0.2292           0.0034            6.19s
        20           0.2083           0.0012            4.89s
        30           0.2002           0.0005            3.66s
        40           0.1980           0.0002            2.42s
        50           0.1972           0.0001            1.22s
        60           0.1957           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=60, subsample=0.75, score=-0.43

[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   52.7s remaining:    0.0s


         1           0.2773           0.0084            7.38s
         2           0.2705           0.0075            7.29s
         3           0.2627           0.0067            7.29s
         4           0.2554           0.0063            7.11s
         5           0.2502           0.0056            7.00s
         6           0.2449           0.0050            6.87s
         7           0.2402           0.0046            6.69s
         8           0.2360           0.0041            6.68s
         9           0.2324           0.0038            6.50s
        10           0.2276           0.0035            6.35s
        20           0.2073           0.0012            4.96s
        30           0.1993           0.0005            3.68s
        40           0.1971           0.0002            2.46s
        50           0.1961           0.0000            1.23s
        60           0.1950           0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=60, subsample=0.75, score=-0.44

[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  1.0min remaining:    0.0s


         1           0.2754           0.0082            7.62s
         2           0.2669           0.0074            7.14s
         3           0.2611           0.0065            7.06s
         4           0.2526           0.0061            6.89s
         5           0.2469           0.0055            6.76s
         6           0.2418           0.0049            6.57s
         7           0.2378           0.0045            6.38s
         8           0.2322           0.0040            6.09s
         9           0.2306           0.0037            5.84s
        10           0.2258           0.0033            5.63s
        20           0.2057           0.0013            4.45s
        30           0.1966           0.0005            3.40s
        40           0.1949           0.0002            2.29s
        50           0.1934           0.0001            1.16s
        60           0.1930          -0.0000            0.00s
[CV]  learning_rate=0.05, n_estimators=60, subsample=0.75, score=-0.44

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:  1.1min remaining:    0.0s


         1           0.2748           0.0085            8.22s
         2           0.2684           0.0077            8.20s
         3           0.2617           0.0068            8.05s
         4           0.2551           0.0062            7.84s
         5           0.2491           0.0056            7.70s
         6           0.2430           0.0051            7.63s
         7           0.2384           0.0046            7.50s
         8           0.2347           0.0042            7.39s
         9           0.2316           0.0038            7.26s
        10           0.2277           0.0034            7.13s
        20           0.2072           0.0013            5.96s
        30           0.1981           0.0005            4.75s
        40           0.1949           0.0002            3.55s
        50           0.1941           0.0000            2.37s
        60           0.1943           0.0000            1.18s
        70           0.1941           0.0000            0.00s
[CV]  le

[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:  1.3min remaining:    0.0s


         1           0.2773           0.0083            8.01s
         2           0.2699           0.0076            8.20s
         3           0.2621           0.0068            8.15s
         4           0.2566           0.0061            7.98s
         5           0.2499           0.0056            7.93s
         6           0.2441           0.0051            7.78s
         7           0.2394           0.0046            7.66s
         8           0.2373           0.0041            7.50s
         9           0.2337           0.0037            7.40s
        10           0.2288           0.0034            7.26s
        20           0.2084           0.0012            5.95s
        30           0.1994           0.0005            4.67s
        40           0.1964           0.0002            3.51s
        50           0.1959           0.0000            2.35s
        60           0.1961           0.0000            1.18s
        70           0.1959           0.0000            0.00s
[CV]  le

[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  1.4min remaining:    0.0s


         1           0.2781           0.0084            8.02s
         2           0.2714           0.0077            7.98s
         3           0.2638           0.0069            7.90s
         4           0.2573           0.0061            8.05s
         5           0.2523           0.0055            7.91s
         6           0.2461           0.0050            7.75s
         7           0.2419           0.0046            7.57s
         8           0.2376           0.0042            7.41s
         9           0.2338           0.0038            7.10s
        10           0.2292           0.0034            6.86s
        20           0.2083           0.0012            5.83s
        30           0.2002           0.0005            4.73s
        40           0.1980           0.0002            3.58s
        50           0.1972           0.0001            2.38s
        60           0.1957           0.0000            1.20s
        70           0.1961           0.0000            0.00s
[CV]  le

[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:  1.5min remaining:    0.0s


         1           0.2773           0.0084            8.94s
         2           0.2705           0.0075            8.59s
         3           0.2627           0.0067            8.57s
         4           0.2554           0.0063            8.43s
         5           0.2502           0.0056            8.25s
         6           0.2449           0.0050            8.09s
         7           0.2402           0.0046            8.01s
         8           0.2360           0.0041            7.80s
         9           0.2324           0.0038            7.64s
        10           0.2276           0.0035            7.47s
        20           0.2073           0.0012            6.07s
        30           0.1993           0.0005            4.82s
        40           0.1971           0.0002            3.58s
        50           0.1961           0.0000            2.37s
        60           0.1950           0.0000            1.19s
        70           0.1946          -0.0000            0.00s
[CV]  le

[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:  1.7min remaining:    0.0s


         1           0.2754           0.0082            8.08s
         2           0.2669           0.0074            7.96s
         3           0.2611           0.0065            7.90s
         4           0.2526           0.0061            7.86s
         5           0.2469           0.0055            7.73s
         6           0.2418           0.0049            7.62s
         7           0.2378           0.0045            7.51s
         8           0.2322           0.0040            7.40s
         9           0.2306           0.0037            7.27s
        10           0.2258           0.0033            7.14s
        20           0.2057           0.0013            5.90s
        30           0.1966           0.0005            4.72s
        40           0.1949           0.0002            3.45s
        50           0.1934           0.0001            2.32s
        60           0.1930          -0.0000            1.17s
        70           0.1927           0.0000            0.00s
[CV]  le

[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:  1.8min remaining:    0.0s


         1           0.2748           0.0085            9.72s
         2           0.2684           0.0077            9.29s
         3           0.2617           0.0068            9.23s
         4           0.2551           0.0062            9.05s
         5           0.2491           0.0056            9.00s
         6           0.2430           0.0051            8.86s
         7           0.2384           0.0046            8.76s
         8           0.2347           0.0042            8.62s
         9           0.2316           0.0038            8.49s
        10           0.2277           0.0034            8.34s
        20           0.2072           0.0013            7.11s
        30           0.1981           0.0005            5.91s
        40           0.1949           0.0002            4.73s
        50           0.1941           0.0000            3.53s
        60           0.1943           0.0000            2.36s
        70           0.1941           0.0000            1.18s
        

[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:  2.0min remaining:    0.0s


         1           0.2773           0.0083            9.01s
         2           0.2699           0.0076            9.10s
         3           0.2621           0.0068            8.99s
         4           0.2566           0.0061            8.95s
         5           0.2499           0.0056            8.81s
         6           0.2441           0.0051            8.69s
         7           0.2394           0.0046            8.60s
         8           0.2373           0.0041            8.51s
         9           0.2337           0.0037            8.38s
        10           0.2288           0.0034            8.26s
        20           0.2084           0.0012            7.34s
        30           0.1994           0.0005            6.08s
        40           0.1964           0.0002            4.87s
        50           0.1959           0.0000            3.64s
        60           0.1961           0.0000            2.41s
        70           0.1959           0.0000            1.20s
        

[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:  2.1min remaining:    0.0s


         1           0.2781           0.0084            9.41s
         2           0.2714           0.0077            9.09s
         3           0.2638           0.0069            9.08s
         4           0.2573           0.0061            8.98s
         5           0.2523           0.0055            8.88s
         6           0.2461           0.0050            8.78s
         7           0.2419           0.0046            8.65s
         8           0.2376           0.0042            8.51s
         9           0.2338           0.0038            8.39s
        10           0.2292           0.0034            8.26s
        20           0.2083           0.0012            7.06s
        30           0.2002           0.0005            5.93s
        40           0.1980           0.0002            4.74s
        50           0.1972           0.0001            3.53s
        60           0.1957           0.0000            2.35s
        70           0.1961           0.0000            1.17s
        

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:  2.3min remaining:    0.0s


         1           0.2773           0.0084            9.25s
         2           0.2705           0.0075            9.05s
         3           0.2627           0.0067            8.94s
         4           0.2554           0.0063            8.81s
         5           0.2502           0.0056            8.72s
         6           0.2449           0.0050            8.69s
         7           0.2402           0.0046            8.57s
         8           0.2360           0.0041            8.45s
         9           0.2324           0.0038            8.37s
        10           0.2276           0.0035            8.23s
        20           0.2073           0.0012            6.96s
        30           0.1993           0.0005            5.86s
        40           0.1971           0.0002            4.89s
        50           0.1961           0.0000            3.67s
        60           0.1950           0.0000            2.42s
        70           0.1946          -0.0000            1.20s
        

[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:  2.5min remaining:    0.0s


         1           0.2754           0.0082            9.46s
         2           0.2669           0.0074           10.31s
         3           0.2611           0.0065           10.04s
         4           0.2526           0.0061            9.62s
         5           0.2469           0.0055            9.41s
         6           0.2418           0.0049            9.18s
         7           0.2378           0.0045            8.98s
         8           0.2322           0.0040            8.80s
         9           0.2306           0.0037            8.64s
        10           0.2258           0.0033            8.55s
        20           0.2057           0.0013            7.27s
        30           0.1966           0.0005            5.98s
        40           0.1949           0.0002            4.74s
        50           0.1934           0.0001            3.53s
        60           0.1930          -0.0000            2.35s
        70           0.1927           0.0000            1.17s
        

[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  6.9min finished


         1           0.2780           0.0084           13.59s
         2           0.2687           0.0077           13.82s
         3           0.2623           0.0070           13.21s
         4           0.2553           0.0063           12.68s
         5           0.2497           0.0057           12.47s
         6           0.2454           0.0051           12.11s
         7           0.2405           0.0047           12.13s
         8           0.2362           0.0041           11.98s
         9           0.2325           0.0038           11.98s
        10           0.2290           0.0035           11.97s
        20           0.2080           0.0012           10.18s
        30           0.1997           0.0005            8.22s
        40           0.1973           0.0002            6.46s
        50           0.1953           0.0001            4.80s
        60           0.1955           0.0000            3.17s
        70           0.1942           0.0000            1.58s
        

In [12]:
import os
from sklearn.metrics import mean_squared_error, make_scorer

def fmean_squared_error(ground_truth, predictions):
    fmean_squared_error_ = mean_squared_error(ground_truth, predictions)**0.5
    return fmean_squared_error_

df_score = pd.read_csv(os.getcwd() + '/data/solution.csv', encoding="ISO-8859-1")
public_score = df_score[df_score.Usage == 'Public']
private_score = df_score[df_score.Usage == 'Private']

public_pred = pd.DataFrame({"id": list(id_test), "relevance": y_pred})
public_pred = public_pred[df_score.Usage == 'Public']
private_pred = pd.DataFrame({"id": list(id_test), "relevance": y_pred})
private_pred = private_pred[df_score.Usage == 'Private']

In [13]:
'public score: ',fmean_squared_error(list(public_pred.relevance), list(public_score.relevance)), 'private score: ',fmean_squared_error(list(private_pred.relevance), list(private_score.relevance))

('public score: ', 0.45183217401848913, 'private score: ', 0.45111671459846481)

In [14]:
y_pred

array([ 2.15673279,  2.12808402,  2.35362101, ...,  1.93183043,
        2.52306935,  2.69263093])