In [114]:
from __future__ import division
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso,LassoLars,ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, Normalizer, PolynomialFeatures
from sklearn.svm import SVR
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn import cross_validation, linear_model
import matplotlib.pyplot as plt
from sklearn.lda import LDA
from sklearn import metrics
import numpy as np
from sklearn.lda import LDA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.grid_search import GridSearchCV

In [54]:
bike_sharing_demand = pd.read_csv('./data/input/train.csv')
prediction_data = pd.read_csv('data/input/test.csv')
train_data, train_labels = bike_sharing_demand.ix[:, 'datetime':'windspeed'], bike_sharing_demand.ix[:, 'casual':]
prediction_data = prediction_data.ix[:, 'datetime':'windspeed']
np.random.seed(0)
shuffle = np.random.permutation(np.arange(train_data.shape[0]))
mini_bike_sharing = bike_sharing_demand.ix[shuffle[:100], :]
mini_train_data, mini_train_labels = mini_bike_sharing.ix[:, 'datetime':'windspeed'], mini_bike_sharing.ix[:, 'casual':]
# Let's extract the information
for dataset in (train_data, prediction_data):
    dataset['hour'] = dataset['datetime'].map(lambda x: (datetime.strptime(x, "%Y-%m-%d %H:%M:%S")).hour)
    dataset['weekday'] = dataset['datetime'].map(lambda x: (datetime.strptime(x, "%Y-%m-%d %H:%M:%S")).weekday())
    dataset['month'] = dataset['datetime'].map(lambda x: (datetime.strptime(x, "%Y-%m-%d %H:%M:%S")).month)
    dataset['year'] = dataset['datetime'].map(lambda x: (datetime.strptime(x, "%Y-%m-%d %H:%M:%S")).year)

cv = cross_validation.ShuffleSplit(train_data.shape[0], n_iter=10, random_state=0)
np.random.seed(0)

In [55]:
# Define the Root-Mean-Squared-Log Error function for scoring predictions
def rmsle(actual_values, predicted_values):
    squared_log_errors = (np.log(np.array(predicted_values) + 1) - np.log(np.array(actual_values) + 1)) ** 2
    mean_squared_errors = np.nansum(squared_log_errors) / len(squared_log_errors)
    return np.sqrt(mean_squared_errors)

In [92]:
def train_predict(model,clf):
    train_scores,test_scores =[],[]
    for i, (train_index, test_index) in enumerate(cv):
        x_train, y_train = train_data.ix[train_index, 'season':], train_labels.ix[train_index, 'count']
        x_test, y_test = train_data.ix[test_index, 'season':], train_labels.ix[test_index, 'count']
        clf.fit(x_train, y_train)
        predicted_values = clf.predict(x_test)
        rmse_score =rmsle(y_test, predicted_values)
        train_score = clf.score(x_train, y_train)
        test_score = clf.score(x_test, y_test)
        print("%s : Loop: %d,RMSLE: %.3f,Train Score %.3f,Test Score %.3f" %(model,i, rmsle(y_test, predicted_values),train_score,test_score))
        train_scores.append(train_score)
        test_scores.append(test_score)
    #print scores
    #print [score for score in scores]
    print("Mean Train Score for %s %.3f , Mean Test Score %.3f" % (model,np.mean(train_scores),np.mean(test_scores)))

    

# LDA 

In [93]:
clf_lda = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', LDA())
])

train_predict('LDA',clf_lda)

LDA : Loop: 0,RMSLE: 1.600,Train Score 0.049,Test Score 0.023
LDA : Loop: 1,RMSLE: 1.539,Train Score 0.049,Test Score 0.022
LDA : Loop: 2,RMSLE: 1.644,Train Score 0.049,Test Score 0.021
LDA : Loop: 3,RMSLE: 1.620,Train Score 0.049,Test Score 0.018
LDA : Loop: 4,RMSLE: 1.546,Train Score 0.048,Test Score 0.021
LDA : Loop: 5,RMSLE: 1.649,Train Score 0.052,Test Score 0.019
LDA : Loop: 6,RMSLE: 1.610,Train Score 0.049,Test Score 0.018
LDA : Loop: 7,RMSLE: 1.649,Train Score 0.049,Test Score 0.022
LDA : Loop: 8,RMSLE: 1.604,Train Score 0.052,Test Score 0.016
LDA : Loop: 9,RMSLE: 1.608,Train Score 0.048,Test Score 0.020
Mean Train Score for LDA 0.049 , Mean Test Score 0.020


# Linear Model

In [94]:
clf_linear = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', LinearRegression())
])
train_predict('Linear ',clf_linear)

Linear  : Loop: 0,RMSLE: 1.155,Train Score 0.393,Test Score 0.397
Linear  : Loop: 1,RMSLE: 1.173,Train Score 0.387,Test Score 0.424
Linear  : Loop: 2,RMSLE: 1.155,Train Score 0.399,Test Score 0.373
Linear  : Loop: 3,RMSLE: 1.168,Train Score 0.397,Test Score 0.381
Linear  : Loop: 4,RMSLE: 1.124,Train Score 0.388,Test Score 0.414
Linear  : Loop: 5,RMSLE: 1.137,Train Score 0.399,Test Score 0.373
Linear  : Loop: 6,RMSLE: 1.160,Train Score 0.393,Test Score 0.397
Linear  : Loop: 7,RMSLE: 1.168,Train Score 0.397,Test Score 0.380
Linear  : Loop: 8,RMSLE: 1.115,Train Score 0.393,Test Score 0.394
Linear  : Loop: 9,RMSLE: 1.162,Train Score 0.399,Test Score 0.373
Mean Train Score for Linear  0.395 , Mean Test Score 0.391


#Polynomial Regression with degree 3

In [95]:
clf_poly = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('poly', PolynomialFeatures(degree=3)),
    ('model', LinearRegression())
])
train_predict('Polynomial Regression degree 3 ',clf_poly)

Polynomial Regression degree 3  : Loop: 0,RMSLE: 0.970,Train Score 0.659,Test Score 0.611
Polynomial Regression degree 3  : Loop: 1,RMSLE: 0.986,Train Score 0.654,Test Score 0.636
Polynomial Regression degree 3  : Loop: 2,RMSLE: 0.970,Train Score 0.663,Test Score 0.586
Polynomial Regression degree 3  : Loop: 3,RMSLE: 0.981,Train Score 0.659,Test Score 0.618
Polynomial Regression degree 3  : Loop: 4,RMSLE: 0.924,Train Score 0.656,Test Score 0.623
Polynomial Regression degree 3  : Loop: 5,RMSLE: 0.912,Train Score 0.660,Test Score 0.600
Polynomial Regression degree 3  : Loop: 6,RMSLE: 0.968,Train Score 0.662,Test Score 0.597
Polynomial Regression degree 3  : Loop: 7,RMSLE: 0.967,Train Score 0.658,Test Score 0.621
Polynomial Regression degree 3  : Loop: 8,RMSLE: 0.978,Train Score 0.657,Test Score 0.621
Polynomial Regression degree 3  : Loop: 9,RMSLE: 0.969,Train Score 0.660,Test Score 0.607
Mean Train Score for Polynomial Regression degree 3  0.659 , Mean Test Score 0.612


#KNN Regressor

In [96]:
clf_knn = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', KNeighborsRegressor(n_neighbors=5))
])
train_predict('KNN ',clf_knn)

KNN  : Loop: 0,RMSLE: 0.899,Train Score 0.763,Test Score 0.607
KNN  : Loop: 1,RMSLE: 0.906,Train Score 0.756,Test Score 0.649
KNN  : Loop: 2,RMSLE: 0.897,Train Score 0.762,Test Score 0.603
KNN  : Loop: 3,RMSLE: 0.890,Train Score 0.757,Test Score 0.616
KNN  : Loop: 4,RMSLE: 0.864,Train Score 0.752,Test Score 0.637
KNN  : Loop: 5,RMSLE: 0.879,Train Score 0.759,Test Score 0.613
KNN  : Loop: 6,RMSLE: 0.899,Train Score 0.758,Test Score 0.595
KNN  : Loop: 7,RMSLE: 0.891,Train Score 0.760,Test Score 0.627
KNN  : Loop: 8,RMSLE: 0.865,Train Score 0.758,Test Score 0.621
KNN  : Loop: 9,RMSLE: 0.903,Train Score 0.757,Test Score 0.618
Mean Train Score for KNN  0.758 , Mean Test Score 0.618


# Ridge Regression with polynomial features with degree 3

In [97]:
clf_ridge = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('poly', PolynomialFeatures(degree=3)),
    ('model', Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
                    normalize=False, solver='auto', tol=0.001))
])
train_predict('Ridge-degree 3',clf_ridge)

Ridge-degree 3 : Loop: 0,RMSLE: 0.986,Train Score 0.637,Test Score 0.616
Ridge-degree 3 : Loop: 1,RMSLE: 1.001,Train Score 0.632,Test Score 0.637
Ridge-degree 3 : Loop: 2,RMSLE: 0.949,Train Score 0.640,Test Score 0.597
Ridge-degree 3 : Loop: 3,RMSLE: 1.008,Train Score 0.637,Test Score 0.611
Ridge-degree 3 : Loop: 4,RMSLE: 0.939,Train Score 0.634,Test Score 0.622
Ridge-degree 3 : Loop: 5,RMSLE: 0.940,Train Score 0.638,Test Score 0.608
Ridge-degree 3 : Loop: 6,RMSLE: 0.965,Train Score 0.639,Test Score 0.606
Ridge-degree 3 : Loop: 7,RMSLE: 0.986,Train Score 0.635,Test Score 0.623
Ridge-degree 3 : Loop: 8,RMSLE: 0.966,Train Score 0.635,Test Score 0.614
Ridge-degree 3 : Loop: 9,RMSLE: 0.978,Train Score 0.638,Test Score 0.602
Mean Train Score for Ridge-degree 3 0.637 , Mean Test Score 0.614


# Lasso with Polynomial features with degree 3

In [98]:
clf_lasso = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('poly', PolynomialFeatures(degree=3)),
    ('model', Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
                    normalize=False, positive=False, precompute=False,
                    tol=0.0001, warm_start=False))
])
train_predict('Lasso-degree 3',clf_lasso)

Lasso-degree 3 : Loop: 0,RMSLE: 1.013,Train Score 0.598,Test Score 0.593
Lasso-degree 3 : Loop: 1,RMSLE: 1.028,Train Score 0.592,Test Score 0.619
Lasso-degree 3 : Loop: 2,RMSLE: 0.982,Train Score 0.600,Test Score 0.579
Lasso-degree 3 : Loop: 3,RMSLE: 0.982,Train Score 0.599,Test Score 0.591
Lasso-degree 3 : Loop: 4,RMSLE: 0.974,Train Score 0.595,Test Score 0.602
Lasso-degree 3 : Loop: 5,RMSLE: 0.967,Train Score 0.600,Test Score 0.578
Lasso-degree 3 : Loop: 6,RMSLE: 1.012,Train Score 0.600,Test Score 0.586
Lasso-degree 3 : Loop: 7,RMSLE: 1.017,Train Score 0.597,Test Score 0.591
Lasso-degree 3 : Loop: 8,RMSLE: 1.007,Train Score 0.596,Test Score 0.592
Lasso-degree 3 : Loop: 9,RMSLE: 1.033,Train Score 0.601,Test Score 0.581
Mean Train Score for Lasso-degree 3 0.598 , Mean Test Score 0.591


#Lasso Lars

In [109]:
clf_lassolars = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('poly', PolynomialFeatures(degree=3)),
    ('model', LassoLars(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
                    normalize=False, precompute=False))
])
train_predict('Lasso-lars 3',clf_lassolars)

Lasso-lars 3 : Loop: 0,RMSLE: 1.012,Train Score 0.598,Test Score 0.593
Lasso-lars 3 : Loop: 1,RMSLE: 1.028,Train Score 0.592,Test Score 0.619
Lasso-lars 3 : Loop: 2,RMSLE: 0.982,Train Score 0.600,Test Score 0.579
Lasso-lars 3 : Loop: 3,RMSLE: 0.982,Train Score 0.599,Test Score 0.591
Lasso-lars 3 : Loop: 4,RMSLE: 0.974,Train Score 0.595,Test Score 0.601
Lasso-lars 3 : Loop: 5,RMSLE: 0.967,Train Score 0.600,Test Score 0.578
Lasso-lars 3 : Loop: 6,RMSLE: 1.012,Train Score 0.600,Test Score 0.586
Lasso-lars 3 : Loop: 7,RMSLE: 1.017,Train Score 0.597,Test Score 0.591
Lasso-lars 3 : Loop: 8,RMSLE: 1.002,Train Score 0.596,Test Score 0.592
Lasso-lars 3 : Loop: 9,RMSLE: 1.037,Train Score 0.601,Test Score 0.581
Mean Train Score for Lasso-lars 3 0.598 , Mean Test Score 0.591


#Elastic Net

In [111]:
clf_elastic_net = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('poly', PolynomialFeatures(degree=3)),
    ('model', ElasticNet(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
                    normalize=False, precompute=False))
])
train_predict('Elastic Net',clf_elastic_net)

Elastic Net : Loop: 0,RMSLE: 1.258,Train Score 0.418,Test Score 0.417
Elastic Net : Loop: 1,RMSLE: 1.257,Train Score 0.411,Test Score 0.445
Elastic Net : Loop: 2,RMSLE: 1.239,Train Score 0.422,Test Score 0.402
Elastic Net : Loop: 3,RMSLE: 1.240,Train Score 0.421,Test Score 0.406
Elastic Net : Loop: 4,RMSLE: 1.224,Train Score 0.413,Test Score 0.430
Elastic Net : Loop: 5,RMSLE: 1.213,Train Score 0.423,Test Score 0.401
Elastic Net : Loop: 6,RMSLE: 1.246,Train Score 0.417,Test Score 0.420
Elastic Net : Loop: 7,RMSLE: 1.247,Train Score 0.420,Test Score 0.407
Elastic Net : Loop: 8,RMSLE: 1.221,Train Score 0.418,Test Score 0.408
Elastic Net : Loop: 9,RMSLE: 1.255,Train Score 0.422,Test Score 0.406
Mean Train Score for Elastic Net 0.418 , Mean Test Score 0.414


   # Support Vector Regression - RBF

In [99]:
clf_svrbf = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', SVR(kernel='rbf', C=1e3, gamma=0.1))
])
train_predict('SVR - RBF',clf_svrbf)

SVR - RBF : Loop: 0,RMSLE: 0.943,Train Score 0.512,Test Score 0.510
SVR - RBF : Loop: 1,RMSLE: 0.966,Train Score 0.504,Test Score 0.551
SVR - RBF : Loop: 2,RMSLE: 0.941,Train Score 0.518,Test Score 0.476
SVR - RBF : Loop: 3,RMSLE: 0.929,Train Score 0.516,Test Score 0.496
SVR - RBF : Loop: 4,RMSLE: 0.905,Train Score 0.507,Test Score 0.510
SVR - RBF : Loop: 5,RMSLE: 0.907,Train Score 0.516,Test Score 0.493
SVR - RBF : Loop: 6,RMSLE: 0.923,Train Score 0.512,Test Score 0.509
SVR - RBF : Loop: 7,RMSLE: 0.947,Train Score 0.510,Test Score 0.500
SVR - RBF : Loop: 8,RMSLE: 0.932,Train Score 0.515,Test Score 0.484
SVR - RBF : Loop: 9,RMSLE: 0.955,Train Score 0.517,Test Score 0.507
Mean Train Score for SVR - RBF 0.513 , Mean Test Score 0.504


#Support Vector Regression - Linear Kernel

In [101]:
clf_svrlin = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', SVR(kernel='linear', C=1e3))
])
train_predict('SVR - Linear kernel',clf_svrlin)

SVR - Linear kernel : Loop: 0,RMSLE: 1.070,Train Score 0.335,Test Score 0.336
SVR - Linear kernel : Loop: 1,RMSLE: 1.080,Train Score 0.325,Test Score 0.374
SVR - Linear kernel : Loop: 2,RMSLE: 1.064,Train Score 0.344,Test Score 0.317
SVR - Linear kernel : Loop: 3,RMSLE: 1.079,Train Score 0.342,Test Score 0.332
SVR - Linear kernel : Loop: 4,RMSLE: 1.031,Train Score 0.329,Test Score 0.347
SVR - Linear kernel : Loop: 5,RMSLE: 1.048,Train Score 0.343,Test Score 0.311
SVR - Linear kernel : Loop: 6,RMSLE: 1.072,Train Score 0.332,Test Score 0.337
SVR - Linear kernel : Loop: 7,RMSLE: 1.089,Train Score 0.342,Test Score 0.325
SVR - Linear kernel : Loop: 8,RMSLE: 1.038,Train Score 0.340,Test Score 0.321
SVR - Linear kernel : Loop: 9,RMSLE: 1.076,Train Score 0.344,Test Score 0.335
Mean Train Score for SVR - Linear kernel 0.338 , Mean Test Score 0.333


#Support Vector Regression - Polynomial Kernel

In [100]:
clf_svrpoly = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', SVR(kernel='poly', C=1e3, degree=3))
])
train_predict('SVR - Polynomial kernel',clf_svrpoly)

SVR - Polynomial kernel : Loop: 0,RMSLE: 1.145,Train Score 0.284,Test Score 0.280
SVR - Polynomial kernel : Loop: 1,RMSLE: 1.135,Train Score 0.276,Test Score 0.317
SVR - Polynomial kernel : Loop: 2,RMSLE: 1.132,Train Score 0.292,Test Score 0.273
SVR - Polynomial kernel : Loop: 3,RMSLE: 1.140,Train Score 0.290,Test Score 0.285
SVR - Polynomial kernel : Loop: 4,RMSLE: 1.115,Train Score 0.278,Test Score 0.281
SVR - Polynomial kernel : Loop: 5,RMSLE: 1.106,Train Score 0.290,Test Score 0.263
SVR - Polynomial kernel : Loop: 6,RMSLE: 1.143,Train Score 0.282,Test Score 0.284
SVR - Polynomial kernel : Loop: 7,RMSLE: 1.140,Train Score 0.289,Test Score 0.281
SVR - Polynomial kernel : Loop: 8,RMSLE: 1.120,Train Score 0.288,Test Score 0.262
SVR - Polynomial kernel : Loop: 9,RMSLE: 1.138,Train Score 0.289,Test Score 0.291
Mean Train Score for SVR - Polynomial kernel 0.286 , Mean Test Score 0.282


#Random Forest Regressor

In [104]:
rf = RandomForestRegressor(random_state=0, n_estimators=100)
clf_rf = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', rf)
])
train_predict('RF Regressor',clf_rf)

RF Regressor : Loop: 0,RMSLE: 0.428,Train Score 0.987,Test Score 0.909
RF Regressor : Loop: 1,RMSLE: 0.428,Train Score 0.987,Test Score 0.917
RF Regressor : Loop: 2,RMSLE: 0.418,Train Score 0.987,Test Score 0.898
RF Regressor : Loop: 3,RMSLE: 0.409,Train Score 0.987,Test Score 0.909
RF Regressor : Loop: 4,RMSLE: 0.404,Train Score 0.988,Test Score 0.898
RF Regressor : Loop: 5,RMSLE: 0.400,Train Score 0.987,Test Score 0.921
RF Regressor : Loop: 6,RMSLE: 0.429,Train Score 0.987,Test Score 0.904
RF Regressor : Loop: 7,RMSLE: 0.433,Train Score 0.987,Test Score 0.904
RF Regressor : Loop: 8,RMSLE: 0.421,Train Score 0.987,Test Score 0.918
RF Regressor : Loop: 9,RMSLE: 0.436,Train Score 0.987,Test Score 0.905
Mean Train Score for RF Regressor 0.987 , Mean Test Score 0.908


#ExtraTree Regressor

In [112]:
from sklearn.ensemble import ExtraTreesRegressor
etr = ExtraTreesRegressor(random_state=0, n_estimators=100)
clf_etr = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', etr)
])
train_predict('Extra Tre Regressor',clf_etr)

Extra Tre Regressor : Loop: 0,RMSLE: 0.441,Train Score 1.000,Test Score 0.912
Extra Tre Regressor : Loop: 1,RMSLE: 0.435,Train Score 1.000,Test Score 0.925
Extra Tre Regressor : Loop: 2,RMSLE: 0.414,Train Score 1.000,Test Score 0.906
Extra Tre Regressor : Loop: 3,RMSLE: 0.421,Train Score 1.000,Test Score 0.920
Extra Tre Regressor : Loop: 4,RMSLE: 0.409,Train Score 1.000,Test Score 0.911
Extra Tre Regressor : Loop: 5,RMSLE: 0.400,Train Score 1.000,Test Score 0.926
Extra Tre Regressor : Loop: 6,RMSLE: 0.433,Train Score 1.000,Test Score 0.913
Extra Tre Regressor : Loop: 7,RMSLE: 0.425,Train Score 1.000,Test Score 0.917
Extra Tre Regressor : Loop: 8,RMSLE: 0.418,Train Score 1.000,Test Score 0.916
Extra Tre Regressor : Loop: 9,RMSLE: 0.425,Train Score 1.000,Test Score 0.916
Mean Train Score for Extra Tre Regressor 1.000 , Mean Test Score 0.916


#Adaboost

In [116]:
from sklearn.ensemble import AdaBoostRegressor
adaboost = AdaBoostRegressor()
clf_ada = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', adaboost)
])
train_predict('Adaboost',clf_ada)

Adaboost : Loop: 0,RMSLE: 1.274,Train Score 0.527,Test Score 0.511
Adaboost : Loop: 1,RMSLE: 1.331,Train Score 0.504,Test Score 0.497
Adaboost : Loop: 2,RMSLE: 1.370,Train Score 0.482,Test Score 0.475
Adaboost : Loop: 3,RMSLE: 1.336,Train Score 0.507,Test Score 0.489
Adaboost : Loop: 4,RMSLE: 1.397,Train Score 0.429,Test Score 0.443
Adaboost : Loop: 5,RMSLE: 1.368,Train Score 0.457,Test Score 0.443
Adaboost : Loop: 6,RMSLE: 1.376,Train Score 0.506,Test Score 0.478
Adaboost : Loop: 7,RMSLE: 1.399,Train Score 0.459,Test Score 0.429
Adaboost : Loop: 8,RMSLE: 1.462,Train Score 0.372,Test Score 0.429
Adaboost : Loop: 9,RMSLE: 1.353,Train Score 0.494,Test Score 0.450
Mean Train Score for Adaboost 0.474 , Mean Test Score 0.464


# Gradient Boosting Regressor

In [103]:
gbm = GradientBoostingRegressor(loss='ls', alpha=0.95, n_estimators=500, max_depth=4, learning_rate=.01,
                                min_samples_leaf=9, min_samples_split=9)

clf_gbm = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('normalizer', Normalizer()),
    ('model', gbm)
])
train_predict('Gradient Boost',clf_gbm)

Gradient Boost : Loop: 0,RMSLE: 0.763,Train Score 0.824,Test Score 0.796
Gradient Boost : Loop: 1,RMSLE: 0.770,Train Score 0.822,Test Score 0.817
Gradient Boost : Loop: 2,RMSLE: 0.736,Train Score 0.824,Test Score 0.799
Gradient Boost : Loop: 3,RMSLE: 0.745,Train Score 0.821,Test Score 0.794
Gradient Boost : Loop: 4,RMSLE: 0.752,Train Score 0.828,Test Score 0.813
Gradient Boost : Loop: 5,RMSLE: 0.733,Train Score 0.822,Test Score 0.801
Gradient Boost : Loop: 6,RMSLE: 0.746,Train Score 0.824,Test Score 0.793
Gradient Boost : Loop: 7,RMSLE: 0.754,Train Score 0.828,Test Score 0.799
Gradient Boost : Loop: 8,RMSLE: 0.735,Train Score 0.821,Test Score 0.805
Gradient Boost : Loop: 9,RMSLE: 0.759,Train Score 0.827,Test Score 0.798
Mean Train Score for Gradient Boost 0.824 , Mean Test Score 0.802


In [117]:
# For now train using the RF model on the entire training set
rf = ExtraTreesRegressor(random_state=0)

# Set the parameters by cross-validation
param_grid = {'n_estimators': [100, 300, 500, 700, 1000], 'max_depth': [None, 1, 2, 3, 5], 'min_samples_split': [1, 2, 3, 5]}
model = GridSearchCV(rf, param_grid=param_grid)
model.fit(train_data.ix[:, 'season':], train_labels.ix[:, 'count'])

# Make predictions
prediction_values = model.predict(prediction_data.ix[:, 'season':])

# Create submission from sample_submission file
submission_df = pd.read_csv('./data/output/sampleSubmission.csv')
submission_df['count'] = prediction_values
submission_df.to_csv('./data/output/rf_tuned_windowedfeatures.csv', index=False)