In [38]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge, RidgeCV

### Load Data
Here we'll load the preprocessed data

In [57]:
parcelids = pd.read_csv('data/parcelid.csv',header=None).loc[:, 1:]
X_train = pd.read_csv('data/modified_trainV1.csv').ix[:,1:]
#y_train = pd.read_csv('data/labels.csv', header=None)[1]
X_test = pd.read_csv('data/modified_testV1.csv').ix[:, 1:]

### Utility Functions

In [27]:
def get_predictions(classifier, test_set, train_set, n_splits=25):
    splitted = np.array_split(test_set.drop(set(test_set.columns) - set(train_set.columns),axis=1), n_splits)
    oct_preds = []
    nov_preds = []
    dec_preds = []

    cols = splitted[0].shape[1]

    count = 0
    for split in splitted:
        count += 1
        print("Predicting On {}".format(count))
        split = np.append(split, np.full([len(split),1], 10), axis=1)
        oct_pred = rf_optimal.predict(split)

        split[:, cols] = np.full(len(split), 11)
        nov_pred = rf_optimal.predict(split)

        split[:, cols] = np.full(len(split), 12)
        dec_pred = rf_optimal.predict(split)

        oct_preds.append(oct_pred)
        nov_preds.append(nov_pred)
        dec_preds.append(dec_pred)

    rf10_predictions = np.hstack(oct_preds)
    rf11_predictions = np.hstack(nov_preds)
    rf12_predictions = np.hstack(dec_preds)
    
    return rf10_predictions, rf11_predictions, rf12_predictions

In [28]:
from datetime import datetime
def write_prediction_file(parcels, pred_array):
    output = pd.DataFrame({'ParcelId': parcels.astype(np.int32),
            '201610': pred_array[0], '201611': pred_array[1], '201612': pred_array[2],
            '201710': pred_array[3], '201711': pred_array[4], '201712': pred_array[5]})
# set col 'ParceID' to first col
    cols = output.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    output = output[cols]
    

    print( "\nWriting results to disk ..." )
    output.to_csv('predictions/sub{}.csv'.format(datetime.now().strftime('%Y%m%d_%H%M%S')), index=False)

### Random Forest Tuning

In [31]:
rf = RandomForestRegressor(n_estimators=150, verbose=True)

param_dists = {"min_samples_split" : [2,3,4],
              "min_samples_leaf" : [1,2,3,4,5],
              "max_depth" : [4,5,6,7,8,9,10,13,14,16,17,20,None],
              "min_impurity_split" : [1e-03,1e-04,1e-05,1e-06,1e-07,1e-08,1e-09,1e-10]}
from sklearn.model_selection import KFold
cv = KFold(5, shuffle=True)
rf_search = RandomizedSearchCV(rf, param_dists, scoring='neg_mean_absolute_error',cv=cv, verbose=True)
rf_search.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]

KeyboardInterrupt: 

In [None]:
print (rf_search.best_params_)
print (rf_search.best_score_)

### GBM Tuning

In [None]:
gbm = GradientBoostingRegressor(n_estimators=150, verbose=True)

gbm_param_dists = {'learning_rate':[0.001,0.005,0.01,0.05,0.1,0.5,1,1.25,1.5,2],
                  'max_depth':[2,3,4,5],
                  'min_samples_split':[2,3,4,5],
                  'min_samples_leaf':[1,2,3],
                  "min_impurity_split" : [1e-03,1e-04,1e-05,1e-06,1e-07,1e-08,1e-09,1e-10],
                   'loss':['ls','lad','huber']}
gbm_search = RandomizedSearchCV(gbm, gbm_param_dists, scoring='neg_mean_absolute_error', cv=cv)

In [None]:
print (gbm_search.best_params_)
print (gbm_search.best_score_)

### Fit Models

In [35]:
# optimal = rf_search.best_estimator_
# gbm_optimal = gbm_search.best_estimator_
gbm_optimal = GradientBoostingRegressor(verbose = True, n_estimators=150, learning_rate=0.1, min_impurity_split = 1e-05, min_samples_leaf=2, max_depth=5, min_samples_split=5)
rf_optimal = RandomForestRegressor(verbose=True, n_estimators=150, max_depth=13, min_impurity_split=0.001, min_samples_leaf=3, min_samples_split=3)

In [36]:
gbm_optimal.fit(X_train, y_train)
rf_optimal.fit(X_train, y_train)

      Iter       Train Loss   Remaining Time 
         1           0.0040            1.13m
         2           0.0040            1.12m
         3           0.0040            1.16m
         4           0.0039            1.14m
         5           0.0039            1.15m
         6           0.0039            1.16m
         7           0.0039            1.14m
         8           0.0039            1.14m
         9           0.0039            1.13m
        10           0.0039            1.12m
        20           0.0039            1.02m
        30           0.0038           54.02s
        40           0.0038           48.33s
        50           0.0038           42.82s
        60           0.0038           37.29s
        70           0.0037           31.83s
        80           0.0037           27.33s
        90           0.0037           22.71s
       100           0.0037           18.83s


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  2.4min finished


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=13,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=0.001, min_samples_leaf=3,
           min_samples_split=3, min_weight_fraction_leaf=0.0,
           n_estimators=150, n_jobs=1, oob_score=False, random_state=None,
           verbose=True, warm_start=False)

In [39]:
ridge = RidgeCV(scoring='neg_mean_absolute_error', store_cv_values=True)

model_preds = pd.DataFrame()
model_preds['rf'] = rf_optimal.predict(X_train)
model_preds['gbm'] = gbm_optimal.predict(X_train)

ridge.fit(model_preds, y_train)

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.3s finished


RidgeCV(alphas=(0.1, 1.0, 10.0), cv=None, fit_intercept=True, gcv_mode=None,
    normalize=False, scoring='neg_mean_absolute_error',
    store_cv_values=True)

In [42]:
print (np.mean(np.abs(ridge.cv_values_)))
print (ridge.get_params())

0.0178082131717
{'normalize': False, 'alphas': (0.1, 1.0, 10.0), 'gcv_mode': None, 'fit_intercept': True, 'cv': None, 'scoring': 'neg_mean_absolute_error', 'store_cv_values': True}


In [None]:
cv_stacked = KFold(10)
p_folds = []

In [43]:
rf10, rf11, rf12 = get_predictions(rf_optimal, X_test, X_train)
gbm10, gbm11, gbm12 = get_predictions(gbm_optimal, X_test, X_train)

Predicting On 1


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.1s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 2


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 3


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 4


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 5


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 6


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 7


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 8


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 9


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 10


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 11


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 12


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 13


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 14


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished


Predicting On 15


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 16


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 17


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 18


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 19


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 20


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 21


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 22


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 23


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 24


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 25


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished


Predicting On 1


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 2


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 3


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 4


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.1s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.1s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 5


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 6


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 7


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 8


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished


Predicting On 9


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 10


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 11


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 12


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 13


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 14


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 15


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 16


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 17


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 18


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.1s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 19


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 20


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 21


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 22


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished


Predicting On 23


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 24


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


Predicting On 25


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    0.9s finished
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


In [46]:
ridge_10 = ridge.predict(pd.DataFrame({'rf':rf10,'gbm':gbm10}))
ridge_11 = ridge.predict(pd.DataFrame({'rf':rf11,'gbm':gbm11}))
ridge_12 = ridge.predict(pd.DataFrame({'rf':rf12,'gbm':gbm12}))

In [48]:
write_prediction_file(parcelids, [ridge_10, ridge_11, ridge_12, ridge_10, ridge_11, ridge_12])

ValueError: cannot copy sequence with size 2 to array axis with dimension 2985216

In [52]:
parcelids

Unnamed: 0,0,10754147
0,1,10759547
1,2,10843547
2,3,10859147
3,4,10879947
4,5,10898347
5,6,10933547
6,7,10940747
7,8,10954547
8,9,10976347
9,10,11073947
