In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('./kaggle/input'):
#for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

./kaggle/input/Kannada-MNIST/train.csv
./kaggle/input/Kannada-MNIST/sample_submission.csv
./kaggle/input/Kannada-MNIST/test.csv
./kaggle/input/Kannada-MNIST/Dig-MNIST.csv


# Loading

In [2]:
train_data = pd.read_csv('./kaggle/input/Kannada-MNIST/train.csv')
valid_data = pd.read_csv('./kaggle/input/Kannada-MNIST/Dig-MNIST.csv')

In [3]:
import random
random.seed(32)
np.random.seed(42)
train_data = train_data.sample(frac=1,random_state=52).reset_index(drop=True)
valid_data = valid_data.sample(frac=1,random_state=62).reset_index(drop=True)
X_train, y_train = (train_data.drop(['label'], axis=1), train_data.label)
X_valid, y_valid = (valid_data.drop(['label'], axis=1), valid_data.label)
#X_train = X_train/255. # We don't have to normalize our data
#X_valid = X_valid/255. # because https://datascience.stackexchange.com/questions/60950/is-it-necessary-to-normalize-data-for-xgboost 

In [4]:
len(X_train), len(X_valid)

(60000, 10240)

In [5]:
import time

In [6]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

In [7]:
from sklearn.metrics import accuracy_score
import random

# Hyperparameter tuning

I used https://towardsdatascience.com/xgboost-fine-tune-and-optimize-your-model-23d996fab663

My idea is to use a multi-phase random training.

1. Train a lot of models on a small training set (random set of the original train.csv) ---> Evaluate them on the validation set (Dig-MNIST.csv) ---> save the best ones
2. These define a narrower range of parameters. We'll train a lot of models (but less then in the 1st step) on the medium-size dataset. ---> Evaluate them on the validation set (Dig-MNIST.csv) ---> save the best ones
3. These define a narrower range of parameters. We'll train some models on this parameter-set randomly again and choose the best model for the competition.


We'll tune the following parameters:

* `max_depth`
* `colsample_bytree`
* `n_estimators`
* `learning_rate`
* `subsample`
* `reg_lambda`

In [8]:
def train(train_set_size,max_depth,colsample_bytree,n_estimators,learning_rate,subsample,reg_lambda):
    """
    Train an XGBoost classifier with these parameters and returns the trained model
    """
    #start = time.time()
    if train_set_size<1.0:
        train_data_sampled = train_data.sample(frac=train_set_size).reset_index(drop=True)
    else:
        train_data_sampled = train_data.copy()
    #print(len(train_data_sampled))
    X_train_sampled, y_train_sampled = (train_data_sampled.drop(['label'], axis=1), train_data_sampled.label)
    
    clf = XGBClassifier(use_label_encoder = False,
                        eval_metric = 'mlogloss',
                        num_class = 10,
                        max_depth = max_depth, 
                        colsample_bytree = colsample_bytree,
                        n_estimators = n_estimators,
                        learning_rate = learning_rate,
                        subsample = subsample,
                        reg_lambda = reg_lambda,
                       )
    clf.fit(X_train_sampled,y_train_sampled)
    #end = time.time()
    #print(f"T: {end-start:.2f}s")
    return clf
    

In [9]:
start = time.time()
model = train(train_set_size = 1.0,
              max_depth=5,
              colsample_bytree = 1.0,
              n_estimators = 10,
              learning_rate = 0.2,
              subsample = 1.0,
              reg_lambda = 100)
end = time.time()
print(f"Time: {end-start:.2f}s")             

Time: 29.22s


In [10]:
start = time.time()
y_pred = model.predict(X_valid)
end = time.time()
print(f"Time: {end-start:.2f}s")

print(accuracy_score(y_valid, y_pred))

Time: 0.12s
0.57998046875


## 1st round

In [11]:
MAX_PARAM_NUM_1 = 100
MAX_PARAM_NUM_1_BEST = 5
parameters_1 = pd.DataFrame()
np.random.seed(112)
for i in range(MAX_PARAM_NUM_1):
    max_depth = np.random.randint(3,21)
    n_estimators = np.random.randint(20,200)
    learning_rate = np.random.rand()*(0.4-0.01)+0.01
    colsample_bytree = np.random.rand()*(1.0-0.1)+0.1
    subsample = np.random.rand()*(1.0-0.1)+0.1
    reg_lambda = np.random.rand()*100.0
    parameters_1 = parameters_1.append({"max_depth":max_depth,
                                        "n_estimators":n_estimators,
                                        "learning_rate":learning_rate,
                                        "colsample_bytree":colsample_bytree,
                                        "subsample":subsample,
                                        "reg_lambda":reg_lambda,
                                        "valid_acc":i},ignore_index=True)
parameters_1["max_depth"] = parameters_1["max_depth"].astype(int)
parameters_1["n_estimators"] = parameters_1["n_estimators"].astype(int)

In [12]:
valid_accs = []
start = time.time()
for i in range(MAX_PARAM_NUM_1):
    params = parameters_1.iloc[i]
    max_depth = int(params["max_depth"])
    n_estimators = int(params["n_estimators"])
    learning_rate = params["learning_rate"]
    colsample_bytree = params["colsample_bytree"]
    subsample = params["subsample"]
    reg_lambda = params["reg_lambda"]
    #print(f"{i+1}\n----\n")
    #print(f"max_depth {max_depth}")
    #print(f"n_estimators {n_estimators}")
    #print(f"learning_rate {learning_rate}")
    #print(f"colsample_bytree {colsample_bytree}")
    #print(f"subsample {subsample}")
    #print(f"reg_lambda {reg_lambda}")
    model = train(train_set_size = 0.05,
                  max_depth = max_depth,
                  colsample_bytree = colsample_bytree,
                  n_estimators = n_estimators,
                  learning_rate = learning_rate,
                  subsample = subsample,
                  reg_lambda = reg_lambda)
    y_pred = model.predict(X_valid)
    valid_accs.append(accuracy_score(y_valid, y_pred))
    end = time.time()
    print(f"[{i+1}/{MAX_PARAM_NUM_1}] VA: {accuracy_score(y_valid, y_pred):.4f} | ET: {end-start:.2f}s")
parameters_1["valid_acc"]=valid_accs

[1/50] VA: 0.5487 | ET: 13.77s
[2/50] VA: 0.5769 | ET: 23.66s
[3/50] VA: 0.5537 | ET: 27.94s
[4/50] VA: 0.5747 | ET: 36.13s
[5/50] VA: 0.5665 | ET: 53.74s
[6/50] VA: 0.5593 | ET: 63.72s
[7/50] VA: 0.5580 | ET: 72.72s
[8/50] VA: 0.4475 | ET: 73.82s
[9/50] VA: 0.5357 | ET: 76.73s
[10/50] VA: 0.5142 | ET: 82.53s
[11/50] VA: 0.5541 | ET: 85.67s
[12/50] VA: 0.5689 | ET: 89.30s
[13/50] VA: 0.5306 | ET: 91.09s
[14/50] VA: 0.5499 | ET: 97.44s
[15/50] VA: 0.5424 | ET: 99.95s
[16/50] VA: 0.5380 | ET: 106.01s
[17/50] VA: 0.4945 | ET: 109.44s
[18/50] VA: 0.5063 | ET: 113.26s
[19/50] VA: 0.5727 | ET: 135.99s
[20/50] VA: 0.5789 | ET: 146.14s
[21/50] VA: 0.5651 | ET: 155.74s
[22/50] VA: 0.5654 | ET: 160.05s
[23/50] VA: 0.5615 | ET: 164.57s
[24/50] VA: 0.5312 | ET: 171.52s
[25/50] VA: 0.5680 | ET: 202.58s
[26/50] VA: 0.5699 | ET: 210.39s
[27/50] VA: 0.5360 | ET: 215.36s
[28/50] VA: 0.5815 | ET: 237.14s
[29/50] VA: 0.5575 | ET: 246.79s
[30/50] VA: 0.5175 | ET: 249.96s
[31/50] VA: 0.5339 | ET: 254.05s
[

In [13]:
parameters_1

Unnamed: 0,max_depth,n_estimators,learning_rate,colsample_bytree,subsample,reg_lambda,valid_acc
0,7,191,0.259719,0.955015,0.168109,77.692966,0.54873
1,12,190,0.377405,0.525297,0.265613,50.927594,0.576855
2,19,71,0.310659,0.208061,0.90689,62.665927,0.553711
3,20,94,0.177366,0.454016,0.939993,7.362099,0.574707
4,19,116,0.10814,0.879084,0.589585,63.126719,0.566504
5,5,149,0.337292,0.738425,0.269561,98.566856,0.559277
6,16,59,0.20623,0.787177,0.715359,72.195738,0.558008
7,12,26,0.089378,0.535771,0.135331,56.502985,0.447461
8,16,127,0.041406,0.155487,0.195553,51.383707,0.535742
9,10,59,0.054733,0.546554,0.439515,97.906001,0.51416


In [14]:
min_parameters_1 = parameters_1.sort_values("valid_acc",ascending=False).head(MAX_PARAM_NUM_1_BEST).min()
max_parameters_1 = parameters_1.sort_values("valid_acc",ascending=False).head(MAX_PARAM_NUM_1_BEST).max()


In [15]:
min_parameters_1

max_depth            6.000000
n_estimators        64.000000
learning_rate        0.035343
colsample_bytree     0.173926
subsample            0.265613
reg_lambda           5.332272
valid_acc            0.570605
dtype: float64

In [16]:
max_parameters_1

max_depth            20.000000
n_estimators        199.000000
learning_rate         0.377405
colsample_bytree      0.948302
subsample             0.939993
reg_lambda           93.396598
valid_acc             0.588086
dtype: float64

## 2nd round

In [17]:
MAX_PARAM_NUM_2 = 20
MAX_PARAM_NUM_2_BEST = 5
parameters_2 = pd.DataFrame()
for i in range(MAX_PARAM_NUM_2):
    max_depth_MAX = int(max_parameters_1["max_depth"]); max_depth_MIN = int(min_parameters_1["max_depth"])
    n_estimators_MAX  = int(max_parameters_1["n_estimators"]); n_estimators_MIN = int(min_parameters_1["n_estimators"])
    learning_rate_MAX = max_parameters_1["learning_rate"]; learning_rate_MIN = min_parameters_1["learning_rate"]
    colsample_bytree_MAX = max_parameters_1["colsample_bytree"]; colsample_bytree_MIN = min_parameters_1["colsample_bytree"]
    subsample_MAX = max_parameters_1["subsample"]; subsample_MIN = min_parameters_1["subsample"]
    reg_lambda_MAX = max_parameters_1["reg_lambda"]; reg_lambda_MIN = min_parameters_1["reg_lambda"]
    
    max_depth = np.random.randint(max_depth_MIN,max_depth_MAX+1)
    n_estimators = np.random.randint(n_estimators_MIN,n_estimators_MAX+1)
    learning_rate = np.random.rand()*(learning_rate_MAX - learning_rate_MIN)+learning_rate_MIN
    colsample_bytree = np.random.rand()*(colsample_bytree_MAX - colsample_bytree_MIN)+colsample_bytree_MIN
    subsample = np.random.rand()*(subsample_MAX - subsample_MIN)+subsample_MIN
    reg_lambda = np.random.rand()*(reg_lambda_MAX - reg_lambda_MIN)+reg_lambda_MIN
    
    parameters_2 = parameters_2.append({"max_depth":max_depth,
                                        "n_estimators":n_estimators,
                                        "learning_rate":learning_rate,
                                        "colsample_bytree":colsample_bytree,
                                        "subsample":subsample,
                                        "reg_lambda":reg_lambda,
                                        "valid_acc":i},ignore_index=True)
parameters_2["max_depth"] = parameters_2["max_depth"].astype(int)
parameters_2["n_estimators"] = parameters_2["n_estimators"].astype(int)

In [18]:
parameters_2

Unnamed: 0,max_depth,n_estimators,learning_rate,colsample_bytree,subsample,reg_lambda,valid_acc
0,9,198,0.28509,0.434505,0.741057,18.424875,0.0
1,8,183,0.266779,0.24718,0.660094,48.108131,1.0
2,17,183,0.083923,0.696306,0.661593,49.28753,2.0
3,12,148,0.158915,0.392656,0.438273,27.767183,3.0
4,8,73,0.263394,0.928465,0.936286,63.047405,4.0
5,14,103,0.344375,0.466496,0.475866,58.22449,5.0
6,18,143,0.172542,0.595568,0.730267,38.843382,6.0
7,9,79,0.30409,0.309865,0.418229,14.117099,7.0
8,16,121,0.13489,0.186461,0.410693,26.535625,8.0
9,10,103,0.343875,0.332662,0.384881,7.06298,9.0


In [19]:
valid_accs = []
start = time.time()
for i in range(MAX_PARAM_NUM_2):
    params = parameters_2.iloc[i]
    max_depth = int(params["max_depth"])
    n_estimators = int(params["n_estimators"])
    learning_rate = params["learning_rate"]
    colsample_bytree = params["colsample_bytree"]
    subsample = params["subsample"]
    reg_lambda = params["reg_lambda"]
    #print(f"{i+1}\n----\n")
    #print(f"max_depth {max_depth}")
    #print(f"n_estimators {n_estimators}")
    #print(f"learning_rate {learning_rate}")
    #print(f"colsample_bytree {colsample_bytree}")
    #print(f"subsample {subsample}")
    #print(f"reg_lambda {reg_lambda}")
    model = train(train_set_size = 0.2,
                  max_depth = max_depth,
                  colsample_bytree = colsample_bytree,
                  n_estimators = n_estimators,
                  learning_rate = learning_rate,
                  subsample = subsample,
                  reg_lambda = reg_lambda)
    y_pred = model.predict(X_valid)
    valid_accs.append(accuracy_score(y_valid, y_pred))
    end = time.time()
    print(f"[{i+1}/{MAX_PARAM_NUM_2}] VA: {accuracy_score(y_valid, y_pred):.4f} | ET: {end-start:.2f}s")
parameters_2["valid_acc"]=valid_accs

1
----

max_depth 9
n_estimators 198
learning_rate 0.28508966801535696
colsample_bytree 0.4345049472516354
subsample 0.741057044808564
reg_lambda 18.424874600299923
[1/20] VA: 0.6070 | ET: 40.39s
2
----

max_depth 8
n_estimators 183
learning_rate 0.2667785775112571
colsample_bytree 0.24717979439026833
subsample 0.6600944012545938
reg_lambda 48.10813078381728
[2/20] VA: 0.6192 | ET: 67.84s
3
----

max_depth 17
n_estimators 183
learning_rate 0.08392261993808556
colsample_bytree 0.6963063907287838
subsample 0.6615929661556417
reg_lambda 49.287530173653714
[3/20] VA: 0.6054 | ET: 151.42s
4
----

max_depth 12
n_estimators 148
learning_rate 0.15891454128948101
colsample_bytree 0.3926561286727518
subsample 0.43827251625559005
reg_lambda 27.767182501550842
[4/20] VA: 0.5980 | ET: 186.17s
5
----

max_depth 8
n_estimators 73
learning_rate 0.26339430873294134
colsample_bytree 0.9284653558288903
subsample 0.9362860121997247
reg_lambda 63.04740528651969
[5/20] VA: 0.6034 | ET: 224.16s
6
----

max_d

In [20]:
min_parameters_2 = parameters_2.sort_values("valid_acc",ascending=False).head(MAX_PARAM_NUM_2_BEST).min()
max_parameters_2 = parameters_2.sort_values("valid_acc",ascending=False).head(MAX_PARAM_NUM_2_BEST).max()

In [21]:
min_parameters_2

max_depth            6.000000
n_estimators        84.000000
learning_rate        0.129193
colsample_bytree     0.247180
subsample            0.338479
reg_lambda          10.495504
valid_acc            0.607031
dtype: float64

In [22]:
max_parameters_2

max_depth            18.000000
n_estimators        198.000000
learning_rate         0.321162
colsample_bytree      0.741586
subsample             0.812907
reg_lambda           91.722419
valid_acc             0.619238
dtype: float64

## 3rd round

In [23]:
MAX_PARAM_NUM_3 = 10
MAX_PARAM_NUM_3_BEST = 10
parameters_3 = pd.DataFrame()
for i in range(MAX_PARAM_NUM_3):
    max_depth_MAX = int(max_parameters_2["max_depth"]); max_depth_MIN = int(min_parameters_2["max_depth"])
    n_estimators_MAX  = int(max_parameters_2["n_estimators"]); n_estimators_MIN = int(min_parameters_2["n_estimators"])
    learning_rate_MAX = max_parameters_2["learning_rate"]; learning_rate_MIN = min_parameters_2["learning_rate"]
    colsample_bytree_MAX = max_parameters_2["colsample_bytree"]; colsample_bytree_MIN = min_parameters_2["colsample_bytree"]
    subsample_MAX = max_parameters_2["subsample"]; subsample_MIN = min_parameters_2["subsample"]
    reg_lambda_MAX = max_parameters_2["reg_lambda"]; reg_lambda_MIN = min_parameters_2["reg_lambda"]
    
    max_depth = np.random.randint(max_depth_MIN,max_depth_MAX+1)
    n_estimators = np.random.randint(n_estimators_MIN,n_estimators_MAX+1)
    learning_rate = np.random.rand()*(learning_rate_MAX - learning_rate_MIN)+learning_rate_MIN
    colsample_bytree = np.random.rand()*(colsample_bytree_MAX - colsample_bytree_MIN)+colsample_bytree_MIN
    subsample = np.random.rand()*(subsample_MAX - subsample_MIN)+subsample_MIN
    reg_lambda = np.random.rand()*(reg_lambda_MAX - reg_lambda_MIN)+reg_lambda_MIN
    
    parameters_3 = parameters_3.append({"max_depth":max_depth,
                                        "n_estimators":n_estimators,
                                        "learning_rate":learning_rate,
                                        "colsample_bytree":colsample_bytree,
                                        "subsample":subsample,
                                        "reg_lambda":reg_lambda,
                                        "valid_acc":i},ignore_index=True)
parameters_3["max_depth"] = parameters_3["max_depth"].astype(int)
parameters_3["n_estimators"] = parameters_3["n_estimators"].astype(int)

In [24]:
parameters_3

Unnamed: 0,max_depth,n_estimators,learning_rate,colsample_bytree,subsample,reg_lambda,valid_acc
0,17,129,0.204074,0.340691,0.35583,10.862342,0.0
1,8,141,0.310368,0.502637,0.790652,29.734329,1.0
2,15,91,0.138424,0.644783,0.354441,42.492595,2.0
3,7,109,0.307611,0.593275,0.444923,73.373555,3.0
4,6,139,0.184049,0.447923,0.515846,11.892984,4.0
5,7,167,0.193668,0.372353,0.400454,21.273244,5.0
6,10,135,0.30671,0.425225,0.681659,25.53347,6.0
7,12,104,0.296939,0.707255,0.426255,78.759679,7.0
8,16,116,0.288777,0.647909,0.627076,71.626262,8.0
9,11,92,0.275959,0.692365,0.718514,62.004094,9.0


In [26]:
valid_accs = []
start = time.time()
models = []
for i in range(MAX_PARAM_NUM_3):
    params = parameters_3.iloc[i]
    max_depth = int(params["max_depth"])
    n_estimators = int(params["n_estimators"])
    learning_rate = params["learning_rate"]
    colsample_bytree = params["colsample_bytree"]
    subsample = params["subsample"]
    reg_lambda = params["reg_lambda"]
    #print(f"{i+1}\n----\n")
    #print(f"max_depth {max_depth}")
    #print(f"n_estimators {n_estimators}")
    #print(f"learning_rate {learning_rate}")
    #print(f"colsample_bytree {colsample_bytree}")
    #print(f"subsample {subsample}")
    #print(f"reg_lambda {reg_lambda}")
    model = train(train_set_size = 1.0,
                  max_depth = max_depth,
                  colsample_bytree = colsample_bytree,
                  n_estimators = n_estimators,
                  learning_rate = learning_rate,
                  subsample = subsample,
                  reg_lambda = reg_lambda)
    models.append(model)
    y_pred = model.predict(X_valid)
    valid_accs.append(accuracy_score(y_valid, y_pred))
    end = time.time()
    print(f"[{i+1}/{MAX_PARAM_NUM_3}] VA: {accuracy_score(y_valid, y_pred):.4f} | ET: {end-start:.2f}s")
parameters_3["valid_acc"]=valid_accs

1
----

max_depth 17
n_estimators 129
learning_rate 0.20407387133312815
colsample_bytree 0.34069145932404016
subsample 0.35582974198271855
reg_lambda 10.862342059682852
[1/10] VA: 0.6090 | ET: 38.85s
2
----

max_depth 8
n_estimators 141
learning_rate 0.3103677262217853
colsample_bytree 0.5026368771403138
subsample 0.7906521560927602
reg_lambda 29.734328638054052
[2/10] VA: 0.6223 | ET: 102.59s
3
----

max_depth 15
n_estimators 91
learning_rate 0.13842421774760033
colsample_bytree 0.6447830593359536
subsample 0.3544407390893327
reg_lambda 42.4925945739505
[3/10] VA: 0.6055 | ET: 164.51s
4
----

max_depth 7
n_estimators 109
learning_rate 0.30761088348687016
colsample_bytree 0.5932752977292034
subsample 0.4449231932485116
reg_lambda 73.3735550029398
[4/10] VA: 0.6302 | ET: 216.67s
5
----

max_depth 6
n_estimators 139
learning_rate 0.18404884935035348
colsample_bytree 0.4479228046587894
subsample 0.5158462451330534
reg_lambda 11.892984254868008
[5/10] VA: 0.6328 | ET: 263.25s
6
----

max_d

In [27]:
parameters_3.sort_values("valid_acc",ascending=False)

Unnamed: 0,max_depth,n_estimators,learning_rate,colsample_bytree,subsample,reg_lambda,valid_acc
4,6,139,0.184049,0.447923,0.515846,11.892984,0.632812
3,7,109,0.307611,0.593275,0.444923,73.373555,0.630176
5,7,167,0.193668,0.372353,0.400454,21.273244,0.622852
1,8,141,0.310368,0.502637,0.790652,29.734329,0.622266
9,11,92,0.275959,0.692365,0.718514,62.004094,0.620703
8,16,116,0.288777,0.647909,0.627076,71.626262,0.620313
6,10,135,0.30671,0.425225,0.681659,25.53347,0.619727
7,12,104,0.296939,0.707255,0.426255,78.759679,0.612402
0,17,129,0.204074,0.340691,0.35583,10.862342,0.608984
2,15,91,0.138424,0.644783,0.354441,42.492595,0.605469


In [28]:
parameters_3

Unnamed: 0,max_depth,n_estimators,learning_rate,colsample_bytree,subsample,reg_lambda,valid_acc
0,17,129,0.204074,0.340691,0.35583,10.862342,0.608984
1,8,141,0.310368,0.502637,0.790652,29.734329,0.622266
2,15,91,0.138424,0.644783,0.354441,42.492595,0.605469
3,7,109,0.307611,0.593275,0.444923,73.373555,0.630176
4,6,139,0.184049,0.447923,0.515846,11.892984,0.632812
5,7,167,0.193668,0.372353,0.400454,21.273244,0.622852
6,10,135,0.30671,0.425225,0.681659,25.53347,0.619727
7,12,104,0.296939,0.707255,0.426255,78.759679,0.612402
8,16,116,0.288777,0.647909,0.627076,71.626262,0.620313
9,11,92,0.275959,0.692365,0.718514,62.004094,0.620703


In [30]:
max_ind = parameters_3["valid_acc"].argmax()

In [32]:
clf = models[max_ind]

In [33]:
y_pred = clf.predict(X_valid)

In [34]:
accuracy_score(y_valid, y_pred)

0.6328125

# Submission

In [None]:
test_data = pd.read_csv('./kaggle/input/Kannada-MNIST/test.csv')
#test_data = pd.read_csv('/kaggle/input/Kannada-MNIST/test.csv')

In [None]:
ids, test_set = test_data.id, test_data.drop(['id'], axis=1)

In [None]:
final_preds = clf.predict(test_set)

In [None]:
pd.Series(final_preds, index=ids, name='label').to_csv('./kaggle/working/submission.csv')
#pd.Series(final_preds, index=ids, name='label').to_csv('/kaggle/working/submission.csv')