In [6]:
def process_data(data):
    '''
    remove redundant columns
    '''
    #rems = ['Id', 'Soil_Type7', 'Soil_Type8', 'Soil_Type15', 'Soil_Type25']
    rems = ['Id', 'Soil_Type7', 'Soil_Type15']
#     #Add constant columns as they don't help in prediction process
#     for c in data.columns:
#         if data[c].std() == 0: #standard deviation is zero
#             rem.append(c)

    #drop the columns
    for rem in rems:
        data.drop(rem,axis=1,inplace=True)
    

    return data

In [7]:
def score(y, y_pred):

    y_true = np.array(y, dtype=int)
    y_predict = np.array(y_pred, dtype=int)
    
    from sklearn.metrics import f1_score

    return f1_score(y_true, y_predict, average='micro')


In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

def normalize_train_data(dataset):
    r, c = dataset.shape
    array = dataset.values
    X_all = array[:,0:(c-1)]
    y_all = array[:,(c-1)]
    size = 10
    X_num = X_all[:,0:size]
    X_cat = X_all[:,size:]

    X_num = StandardScaler().fit_transform(X_num)
    X_num = MinMaxScaler().fit_transform(X_num)
    X_num = Normalizer().fit_transform(X_num)

    X_all_scaled = np.concatenate((X_num, X_cat), axis=1)
    
    return X_all_scaled, y_all

def normalize_test_data(dataset):
    r, c = dataset.shape
    X_all = dataset.values
    y_all = []
    size = 10
    X_num = X_all[:,0:size]
    X_cat = X_all[:,size:]

    X_num = StandardScaler().fit_transform(X_num)
    X_num = MinMaxScaler().fit_transform(X_num)
    X_num = Normalizer().fit_transform(X_num)

    X_all_scaled = np.concatenate((X_num, X_cat), axis=1)
    
    return X_all_scaled, y_all

def train_extract(train, test):
    X_train, y_train = normalize_train_data(train)
    X_test, y_test = normalize_train_data(test)
    
    return X_train, y_train, X_test, y_test



In [9]:
def perform_cross_validation(model, train):
    '''Performs a kfold cross validation of a given model'''
    kfold_train_test = []
    extracted_features = []
    kf = StratifiedKFold(train["Cover_Type"], n_folds=10)
    for train_index, test_index in kf:
        train_kfold = train.loc[train_index]
        test_kfold = train.loc[test_index]
        extracted_features.append(tuple(train_extract(train_kfold, test_kfold)))
    score_count = 0
    score_total = 0.0
    submission = []
    print (model)
    for X_train, y_train, X_test, y_test in extracted_features:

        model.fit(X_train, y_train)
        #score = model.score(X_test, y_test)
        predictions = model.predict(X_test)
        score = f1_score(y_test, predictions, average='micro')
        test_data = pd.DataFrame({'id': y_test, 'predictions': predictions})
        submission.append(test_data)
        score_count += 1
        score_total += score
        print("Kfold score " + str(score_count) + ": " + str(score))
    average_score = score_total/float(score_count)
    print("Average score: " + str(average_score))
    return submission

In [10]:
def perform_predictions(model, train, test):
    '''
    Performs the final prediction on test dataset
    '''
    global Id
    
    submission = []
    X_train, y_train = normalize_train_data(train)
    X_test, y_test = normalize_test_data(test)

    model.fit(X_train, y_train)
    final_predictions = model.predict(X_test)
    
    test_data = pd.DataFrame({'Id': Id, 'Cover_Type': final_predictions})
    submission.append(test_data)
    #submission = pd.DataFrame({'id': test_clean['id'], 'prediction': weighted_prediction})

    return submission

In [11]:
def to_csv(df,out):
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    df.to_csv(out, index=False)
    return

In [100]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import KFold, StratifiedKFold
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from itertools import combinations
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier


if __name__ == '__main__':
    print ('Loading data...')
    train_raw = pd.read_csv('data/train.csv')
    test_raw = pd.read_csv('data/test.csv')
    Id = test_raw['Id']
    print ('Cleaning data...')
    train_clean = process_data(train_raw)
    test_clean = process_data(test_raw)


    print ('Training...')

    seed = 19
    from sklearn.ensemble import BaggingClassifier
    from sklearn.tree import DecisionTreeClassifier
    base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=13)
    model_0 = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator, n_estimators=100, random_state=seed)
    model_1 = KNeighborsClassifier(n_jobs=-1, algorithm='auto',n_neighbors=1)
    model_2 = SGDClassifier(loss='log',penalty='elasticnet', n_iter=7, random_state=seed, shuffle=True)
    
    cv_pred_1 = perform_cross_validation(model_0, train_clean)
    cv_pred_2 = perform_cross_validation(model_1, train_clean)
    cv_pred_3 = perform_cross_validation(model_2, train_clean)
    
    print ('Predicting...')
    pred_1 = perform_predictions(model_0, train_clean, test_clean)
    pred_2 = perform_predictions(model_1, train_clean, test_clean)
    pred_3 = perform_predictions(model_2, train_clean, test_clean)

    print ('Ensembling...')
    cv_preds = [cv_pred_1, cv_pred_2, cv_pred_3]
    wt_final = []
    for i in range(1500):
        w = np.random.dirichlet(np.ones(3),size=1)
        wt_final.append(w)
    max_average_score = 0.67
    max_weights = None
    for wt in wt_final:
        total_score = 0
        for i in range(9):
            y_true = cv_preds[0][i]['id']
            weighted_prediction = sum([wt[0][x] * cv_preds[x][i]['predictions'].astype(int).reset_index() for x in range(3)])
            weighted_prediction = [round(p) for p in weighted_prediction['predictions']]
            total_score += score(y_true, weighted_prediction)
        average_score = total_score/9.0
        if (average_score > max_average_score):
            max_average_score = average_score
            max_weights = wt
    print ('Best set of weights: ' + str(max_weights))
    print ('Corresponding score: ' + str(max_average_score))
    preds = [pred_1, pred_2, pred_3]
    weighted_prediction = sum([max_weights[0][x] * preds[x][0]['Cover_Type'].astype(int) for x in range(3)])
    weighted_prediction = [int(round(p)) for p in weighted_prediction]
    submission = pd.DataFrame({'Id': Id, 'Cover_Type': weighted_prediction})
    #submission.to_csv('submission.csv', index=False)
    to_csv(submission, 'submission.csv')
    print('Output submission file')

Loading data...
Cleaning data...
Training...




BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=13,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=19, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=100, n_jobs=-1, oob_score=False,
         random_state=19, verbose=0, warm_start=False)
Kfold score 1: 0.549603174603
Kfold score 2: 0.637566137566
Kfold score 3: 0.62037037037
Kfold score 4: 0.569444444444
Kfold score 5: 0.580687830688
Kfold score 6: 0.499338624339
Kfold score 7: 0.555555555556
Kfold score 8: 0.683201058201
Kfold score 9: 0.736111111111
Kfold score 10: 0.630291005291
Average score: 0.606216931217




KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=1, p=2,
           weights='uniform')
Kfold score 1: 0.634259259259
Kfold score 2: 0.67328042328
Kfold score 3: 0.699735449735
Kfold score 4: 0.640211640212
Kfold score 5: 0.622354497354
Kfold score 6: 0.636243386243
Kfold score 7: 0.67791005291
Kfold score 8: 0.664021164021
Kfold score 9: 0.787698412698
Kfold score 10: 0.739417989418
Average score: 0.677513227513
SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='log', n_iter=7, n_jobs=1,
       penalty='elasticnet', power_t=0.5, random_state=19, shuffle=True,
       verbose=0, warm_start=False)




Kfold score 1: 0.498677248677
Kfold score 2: 0.718915343915
Kfold score 3: 0.617063492063
Kfold score 4: 0.539021164021
Kfold score 5: 0.569444444444
Kfold score 6: 0.625
Kfold score 7: 0.61044973545
Kfold score 8: 0.638888888889
Kfold score 9: 0.602513227513
Kfold score 10: 0.675925925926
Average score: 0.60958994709
Predicting...




Ensembling...
Best set of weights: [[ 0.01749344  0.91882812  0.06367843]]
Corresponding score: 0.670634920635
Output submission file


In [99]:
    print ('Ensembling...')
    cv_preds = [cv_pred_1, cv_pred_2, cv_pred_3]
    wt_final = []
    for i in range(1000):
        w = np.random.dirichlet(np.ones(3),size=1)
        wt_final.append(w)
    max_average_score = 0.67
    max_weights = None
    for wt in wt_final:
        total_score = 0
        for i in range(9):
            y_true = cv_preds[0][i]['id']
            weighted_prediction = sum([wt[0][x] * cv_preds[x][i]['predictions'].astype(int).reset_index() for x in range(3)])
            weighted_prediction = [round(p) for p in weighted_prediction['predictions']]
            total_score += score(y_true, weighted_prediction)
        average_score = total_score/9.0
        if (average_score > max_average_score):
            max_average_score = average_score
            max_weights = wt
    print ('Best set of weights: ' + str(max_weights))
    print ('Corresponding score: ' + str(max_average_score))
    preds = [pred_1, pred_2, pred_3]
    weighted_prediction = sum([max_weights[0][x] * preds[x][0]['Cover_Type'].astype(int) for x in range(3)])
    weighted_prediction = [int(round(p)) for p in weighted_prediction]
    submission = pd.DataFrame({'Id': Id, 'Cover_Type': weighted_prediction})
    #submission.to_csv('submission.csv', index=False)
    to_csv(submission, 'submission.csv')
    print('Output submission file')

Ensembling...
Best set of weights: [[  3.28277355e-04   9.47608900e-01   5.20628224e-02]]
Corresponding score: 0.670634920635
Output submission file


In [95]:
    base_estimator = DecisionTreeClassifier(random_state=seed)
    model_0 = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator)
    model_1 = KNeighborsClassifier(n_jobs=-1)
    model_2 = SGDClassifier(loss='log',penalty='elasticnet', n_iter=7, random_state=seed, shuffle=True)


In [96]:
    #cv_pred_1 = perform_cross_validation(model_0, train_clean)
    #cv_pred_2 = perform_cross_validation(model_1, train_clean)
    cv_pred_3 = perform_cross_validation(model_2, train_clean)



SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='log', n_iter=7, n_jobs=1,
       penalty='elasticnet', power_t=0.5, random_state=19, shuffle=True,
       verbose=0, warm_start=False)
Kfold score 1: 0.498677248677
Kfold score 2: 0.718915343915
Kfold score 3: 0.617063492063
Kfold score 4: 0.539021164021
Kfold score 5: 0.569444444444
Kfold score 6: 0.625
Kfold score 7: 0.61044973545
Kfold score 8: 0.638888888889
Kfold score 9: 0.602513227513
Kfold score 10: 0.675925925926
Average score: 0.60958994709


In [42]:
    print ('Ensembling...')
    cv_preds = [cv_pred_1, cv_pred_2, cv_pred_3]
    wt_final = []
    for i in range(1000):
        w = np.random.dirichlet(np.ones(3),size=1)
        wt_final.append(w)
    max_average_score = 0.68
    max_weights = None
    for wt in wt_final:
        total_score = 0
        for i in range(10):
            for x in range(3):
                y_true = cv_preds[x][i]['id']
                weighted_prediction = sum([wt[0][x] * cv_preds[x][i]['predictions'].astype(int).reset_index()])
                weighted_prediction = [round(p) for p in weighted_prediction['predictions']]
                #print(weighted_prediction)
                total_score += score(y_true, weighted_prediction)
                #print(total_score)
            average_score = total_score/10.0
            if (average_score > max_average_score):
                max_average_score = average_score
                max_weights = wt
    print ('Best set of weights: ' + str(max_weights))
    print ('Corresponding score: ' + str(max_average_score))
    
    preds = [pred_1, pred_2, pred_3]
    weighted_prediction = sum([max_weights[0][x] * preds[x][0]['Cover_Type'].astype(int) for x in range(3)])
    weighted_prediction = [int(round(p)) for p in weighted_prediction]
    submission = pd.DataFrame({'Id': Id, 'Cover_Type': weighted_prediction})
    #submission.to_csv('submission.csv', index=False)
    to_csv(submission, 'submission.csv')
    print('Output submission file')

Ensembling...
Best set of weights: [[ 0.00634345  0.94818049  0.04547606]]
Corresponding score: 0.687698412698
Output submission file


In [57]:
from sklearn.metrics import make_scorer
from sklearn.grid_search import GridSearchCV

def tuner(model, param_grid, dataset):
    X_train, y_train, _, _ = train_extract(dataset, dataset)
    tuning_scorer = make_scorer(score, greater_is_better = True)
    
    tuner_model = GridSearchCV(estimator=model, 
                                param_grid=param_grid, 
                                scoring=tuning_scorer,
                                verbose=10, 
                                n_jobs=-1, 
                                iid=True, 
                                refit=True, 
                                cv=5)

    tuner_model.fit(X_train, y_train)
    print("Best score: %0.3f" % tuner_model.best_score_)
    print("Best parameters set:")
    best_parameters = tuner_model.best_estimator_.get_params()
    for param_name in sorted(param_grid.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
        


In [76]:
    base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=19)
    model_0 = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator, n_estimators=100, random_state=seed)

In [77]:
    param_grid = {
        'n_estimators': np.arange(89, 100, 2),
        }
    tuner(model_0, param_grid, train_clean)
    
    



Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] n_estimators=89 .................................................
[CV] n_estimators=89 .................................................
[CV] n_estimators=89 .................................................


  **self._backend_args)


[CV] n_estimators=89 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=89, score=0.740079 -  24.5s
[CV] n_estimators=89 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=89, score=0.779431 -  24.6s
[CV] ........................ n_estimators=89, score=0.745701 -  24.7s
[CV] n_estimators=91 .................................................
[CV] n_estimators=91 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=89, score=0.747685 -  25.3s
[CV] n_estimators=91 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=89, score=0.838294 -  26.1s
[CV] n_estimators=91 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   50.8s


[CV] ........................ n_estimators=91, score=0.738757 -  26.3s


  **self._backend_args)


[CV] n_estimators=91 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=91, score=0.745701 -  26.7s
[CV] n_estimators=93 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=91, score=0.749008 -  27.1s
[CV] n_estimators=93 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=91, score=0.777116 -  25.0s
[CV] n_estimators=93 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=91, score=0.837302 -  25.2s


  **self._backend_args)


[CV] n_estimators=93 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.3min


[CV] ........................ n_estimators=93, score=0.744709 -  25.2s
[CV] n_estimators=93 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=93, score=0.738095 -  25.5s
[CV] n_estimators=95 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=93, score=0.778439 -  26.1s
[CV] n_estimators=95 .................................................


  **self._backend_args)


[CV] ........................ n_estimators=93, score=0.749669 -  27.0s
[CV] ........................ n_estimators=93, score=0.837632 -  26.0s
[CV] n_estimators=95 .................................................
[CV] n_estimators=95 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=95, score=0.743056 -  26.1s
[CV] n_estimators=95 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=95, score=0.779762 -  21.7s
[CV] n_estimators=97 .................................................
[CV] ........................ n_estimators=95, score=0.739418 -  22.0s


  **self._backend_args)


[CV] n_estimators=97 .................................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  2.1min
  **self._backend_args)


[CV] ........................ n_estimators=95, score=0.749008 -  22.4s
[CV] n_estimators=97 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=95, score=0.837963 -  22.1s
[CV] n_estimators=97 .................................................


  **self._backend_args)
  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=97, score=0.747024 -  22.8s
[CV] n_estimators=97 .................................................
[CV] ........................ n_estimators=97, score=0.739418 -  22.8s


  **self._backend_args)


[CV] n_estimators=99 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=97, score=0.750000 -  23.4s
[CV] n_estimators=99 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=97, score=0.779431 -  22.6s
[CV] n_estimators=99 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=97, score=0.839947 -  23.6s
[CV] n_estimators=99 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=99, score=0.749008 -  23.9s
[CV] n_estimators=99 .................................................


  **self._backend_args)
  **self._backend_args)


[CV] ........................ n_estimators=99, score=0.739418 -  23.5s


[Parallel(n_jobs=-1)]: Done  27 out of  30 | elapsed:  2.9min remaining:   19.1s
  **self._backend_args)


[CV] ........................ n_estimators=99, score=0.747024 -  24.2s


  **self._backend_args)


[CV] ........................ n_estimators=99, score=0.778439 -  15.3s


  **self._backend_args)


[CV] ........................ n_estimators=99, score=0.838955 -  15.3s


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:  3.1min finished


Best score: 0.771
Best parameters set:
	n_estimators: 97


In [None]:
    base_estimator = DecisionTreeClassifier(random_state=seed,max_depth=13)
    model_0 = BaggingClassifier(n_jobs=-1,base_estimator=base_estimator, n_estimators=100, random_state=seed)
    model_1 = KNeighborsClassifier(n_jobs=-1,n_neighbors=1)
    model_2 = SGDClassifier(loss='modified_huber', n_iter=5, random_state=seed, shuffle=True)

In [75]:
    param_grid = {
        #'n_estimators': np.arange(89, 100, 2),
        'algorithm':["auto", "ball_tree", "kd_tree", "brute"],
        'n_neighbors':[1,2,3]
        }
    tuner(model_1, param_grid, train_clean)
    
    



Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.722553 -   1.9s
[CV] algorithm=auto, n_neighbors=1 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.694444 -   1.9s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.700397 -   2.1s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.742063 -   2.2s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=1, score=0.804563 -   1.4s
[CV] algorithm=a

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    3.5s


[CV] .......... algorithm=auto, n_neighbors=2, score=0.674272 -   2.0s
[CV] algorithm=auto, n_neighbors=2 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.664021 -   2.0s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.670304 -   1.9s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.700397 -   1.8s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=2, score=0.786045 -   1.9s
[CV] algorithm=auto, n_neighbors=3 ...................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    6.0s


[CV] .......... algorithm=auto, n_neighbors=3, score=0.682209 -   2.2s
[CV] algorithm=auto, n_neighbors=3 ...................................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.677579 -   2.5s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.665675 -   2.4s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.709656 -   2.3s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] .......... algorithm=auto, n_neighbors=3, score=0.783730 -   2.2s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.722553 -   3.6s
[CV] algorithm=ball_tree, n_neighbors=1 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.694444 -   2.9s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] .

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   11.2s


[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.700397 -   4.1s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=1, score=0.804563 -   2.2s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.674272 -   3.1s
[CV] algorithm=ball_tree, n_neighbors=2 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.670304 -   2.8s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.664021 -   3.6s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.700397 -   2.7s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   15.5s


[CV] ..... algorithm=ball_tree, n_neighbors=2, score=0.786045 -   1.9s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.682209 -   3.3s
[CV] algorithm=ball_tree, n_neighbors=3 ..............................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.665675 -   3.1s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.677579 -   4.2s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.709656 -   3.0s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ..... algorithm=ball_tree, n_neighbors=3, score=0.783730 -   2.1s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.722553 -   2.1s
[CV] algorithm=kd_tree, n_neighbors=1 ................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   21.5s


[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.742063 -   2.1s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=1, score=0.804563 -   2.1s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.674272 -   2.5s
[CV] algorithm=kd_tree, n_neighbors=2 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.664021 -   2.5s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.670304 -   2.4s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.700397 -   2.5s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] ....... algorithm=kd_tree, n_neighbors=2, score=0.786045 -   2.1s
[CV] algorithm=kd_tree, n_neighbors=3 ................................
[CV] .

  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   27.3s


[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.665675 -   2.5s
[CV] algorithm=brute, n_neighbors=1 ..................................


  **self._backend_args)


[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.709656 -   2.8s
[CV] algorithm=brute, n_neighbors=1 ..................................
[CV] ....... algorithm=kd_tree, n_neighbors=3, score=0.783730 -   2.4s
[CV] algorithm=brute, n_neighbors=1 ..................................


  **self._backend_args)
  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.722553 -   7.2s
[CV] ......... algorithm=brute, n_neighbors=1, score=0.700397 -   6.6s
[CV] algorithm=brute, n_neighbors=1 ..................................
[CV] ......... algorithm=brute, n_neighbors=1, score=0.694444 -   5.8s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)
  **self._backend_args)


[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.742063 -   6.9s
[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=1, score=0.804563 -   6.9s
[CV] algorithm=brute, n_neighbors=2 ..................................
[CV] ......... algorithm=brute, n_neighbors=2, score=0.674272 -   6.9s


  **self._backend_args)


[CV] algorithm=brute, n_neighbors=2 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.664021 -   7.2s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=2, score=0.670304 -   6.5s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   42.5s


[CV] ......... algorithm=brute, n_neighbors=2, score=0.700397 -   5.2s
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=3, score=0.682209 -   5.1s
[CV] ......... algorithm=brute, n_neighbors=2, score=0.786045 -   5.4s
[CV] algorithm=brute, n_neighbors=3 ..................................
[CV] algorithm=brute, n_neighbors=3 ..................................


  **self._backend_args)
  **self._backend_args)


[CV] ......... algorithm=brute, n_neighbors=3, score=0.677579 -   5.1s
[CV] ......... algorithm=brute, n_neighbors=3, score=0.665675 -   3.0s
[CV] ......... algorithm=brute, n_neighbors=3, score=0.709656 -   2.9s
[CV] ......... algorithm=brute, n_neighbors=3, score=0.783730 -   2.9s


[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   50.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   50.2s finished


Best score: 0.733
Best parameters set:
	algorithm: 'auto'
	n_neighbors: 1


In [94]:
    model_2 = SGDClassifier(loss='log',penalty='elasticnet', random_state=seed, shuffle=True)

        
    param_grid = {
        #'loss':[ "hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
        #'penalty': ["l2", "l1", "elasticnet"],
        'n_iter': [1,2,3,4,5,6,7,8,9,20]
        }
    
    tuner(model_2, param_grid, train_clean)
    
    

Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] n_iter=1 ........................................................
[CV] ............................... n_iter=1, score=0.520172 -   0.2s
[CV] ............................... n_iter=1, score=0.510251 -   0.2s
[CV] n_iter=2 ........................................................
[CV] n_iter=1 ........................................................
[CV] ............................... n_iter=1, score=0.590278 -   0.2s
[CV] n_iter=2 ........................................................
[CV] ............................... n_iter=1, score=0.541336 -   0.3s
[CV] n_iter=2 ........................................................
[CV] ............................... n_iter=1, score=0.546958 -   0.2s
[CV] n_iter=2 ........................................................
[CV] .

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.6s


[CV] n_iter=3 ........................................................
[CV] ............................... n_iter=2, score=0.596230 -   0.3s
[CV] ............................... n_iter=2, score=0.630291 -   0.2s
[CV] n_iter=3 ........................................................
[CV] n_iter=3 ........................................................
[CV] ............................... n_iter=3, score=0.567130 -   0.4s
[CV] ............................... n_iter=3, score=0.537698 -   0.3s
[CV] n_iter=3 ........................................................
[CV] n_iter=4 ........................................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    0.9s


[CV] ............................... n_iter=3, score=0.600198 -   0.3s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=3, score=0.593254 -   0.3s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=3, score=0.623347 -   0.3s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=4, score=0.558201 -   0.4s
[CV] n_iter=4 ........................................................
[CV] ............................... n_iter=4, score=0.524802 -   0.4s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=4, score=0.583003 -   0.4s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=4, score=0.574405 -   0.4s


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.7s


[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=4, score=0.636905 -   0.4s
[CV] n_iter=5 ........................................................
[CV] ............................... n_iter=5, score=0.554894 -   0.5s
[CV] ............................... n_iter=5, score=0.540344 -   0.5s
[CV] n_iter=5 ........................................................
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=5, score=0.577381 -   0.5s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=5, score=0.604497 -   0.5s
[CV] n_iter=6 ........................................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    2.5s


[CV] ............................... n_iter=5, score=0.617394 -   0.5s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=6, score=0.573413 -   0.6s
[CV] n_iter=6 ........................................................
[CV] ............................... n_iter=6, score=0.534061 -   0.6s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.596561 -   0.6s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.569775 -   0.7s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=6, score=0.650463 -   0.8s
[CV] n_iter=7 ........................................................
[CV] ............................... n_iter=7, score=0.539021 -   0.8s
[CV] n_iter=7 ........................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    4.3s


[CV] ............................... n_iter=7, score=0.653108 -   0.7s
[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=7, score=0.653770 -   0.7s
[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=8, score=0.574074 -   0.8s
[CV] n_iter=8 ........................................................
[CV] ............................... n_iter=8, score=0.552910 -   0.7s
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=8, score=0.575066 -   0.8s
[CV] ............................... n_iter=8, score=0.604497 -   0.8s
[CV] n_iter=9 ........................................................
[CV] n_iter=9 ........................................................
[CV] ............................... n_iter=8, score=0.638228 -   0.7s
[CV] n_iter=9 ........................................................
[CV] .

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    6.2s


[CV] ............................... n_iter=9, score=0.597222 -   0.8s
[CV] n_iter=20 .......................................................
[CV] ............................... n_iter=9, score=0.655754 -   0.8s
[CV] n_iter=20 .......................................................
[CV] .............................. n_iter=20, score=0.544312 -   1.6s
[CV] n_iter=20 .......................................................
[CV] .............................. n_iter=20, score=0.555886 -   1.7s
[CV] .............................. n_iter=20, score=0.609127 -   1.6s
[CV] .............................. n_iter=20, score=0.632275 -   1.4s
[CV] .............................. n_iter=20, score=0.668320 -   1.0s


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    8.9s finished


Best score: 0.603
Best parameters set:
	n_iter: 7
