# XGBoost on CNN Layer 1 Channel Activations

In [2]:
import os

import pandas as pd
import numpy as np
from scipy import stats

import torch

import itertools

import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import metrics 
#this has been updated since the article we're reading
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn import preprocessing
from sklearn import svm
from sklearn.metrics import accuracy_score

#turn on all output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Overview
Here we use XGBoost to classify the 'Layer 1' Channel activations from the CNN, and we get a final test accuracy result of 89.80.

# Import Data

In [3]:
### --- import the activations and labels, convert from torch tensors to numpy arrays
### assumes files are local, downloaded from https://drive.google.com/drive/folders/1ZD2fEtf7Fy1k-gdjOGT6QAhrWxon5LaT?usp=sharing

## - import last layer activations, these were saved as one dictionary, we'll access later
last_acts = pd.read_pickle('C:\\141C-local-data\\final-project\\last_activations.pkl')

## - Training set
#window activations (the first type of activations)
train_acts_window = pd.read_pickle('C:\\141C-local-data\\final-project\\train_acts_window.pkl')
train_acts_window = train_acts_window.numpy()

#channel activations (the second type of activations)
train_acts_channel = pd.read_pickle('C:\\141C-local-data\\final-project\\train_acts_channel.pkl')
train_acts_channel = train_acts_channel.numpy()

#last layer activations (the third type)
train_acts_last = last_acts['train'].numpy()

## - Validation set
#window activations
valid_acts_window = pd.read_pickle('C:\\141C-local-data\\final-project\\valid_acts_window.pkl')
valid_acts_window = valid_acts_window.numpy()

#channel activations
valid_acts_channel = pd.read_pickle('C:\\141C-local-data\\final-project\\valid_acts_channel.pkl')
valid_acts_channel = valid_acts_channel.numpy()

#last layer activations
valid_acts_last = last_acts['valid'].numpy()

## - Test set
#window activations (the first type of activations)
test_acts_window = pd.read_pickle('C:\\141C-local-data\\final-project\\test_acts_window.pkl')
test_acts_window = test_acts_window.numpy()

#channel activations (the second type of activations)
test_acts_channel = pd.read_pickle('C:\\141C-local-data\\final-project\\test_acts_channel.pkl')
test_acts_channel = test_acts_channel.numpy()

#last layer activations
test_acts_last = last_acts['test'].numpy()

## - Labels
label_dict = pd.read_pickle('C:\\141C-local-data\\final-project\\label_dict.pickle')

In [4]:
### --- inspect the activations, reshape
print('Training window activations shape is {}'.format(train_acts_window.shape))
print('Training channel activations shape is {}'.format(train_acts_channel.shape))
print('Training last activations shape is {}'.format(train_acts_last.shape))

print('Validation window activations shape is {}'.format(valid_acts_window.shape))
print('Validation channel activations shape is {}'.format(valid_acts_channel.shape))
print('Validation last activations shape is {}'.format(valid_acts_last.shape))

print('Test window activations shape is {}'.format(test_acts_window.shape))
print('Test channel activations shape is {}'.format(test_acts_channel.shape))
print('Test last activations shape is {}'.format(test_acts_last.shape))

Training window activations shape is (3780, 24, 18)
Training channel activations shape is (3780, 2048)
Training last activations shape is (3780, 512)
Validation window activations shape is (566, 24, 18)
Validation channel activations shape is (566, 2048)
Validation last activations shape is (566, 512)
Test window activations shape is (569, 24, 18)
Test channel activations shape is (569, 2048)
Test last activations shape is (569, 512)


In [5]:
### --- reshape the window activations
#reshape
train_acts_window = train_acts_window.reshape(3780,24*18)
valid_acts_window = valid_acts_window.reshape(566,24*18)
test_acts_window = test_acts_window.reshape(569,24*18)

print('Training windows activations shape after reshape is {}'.format(train_acts_window.shape))
print('Validation windows activations shape after reshape is {}'.format(valid_acts_window.shape))
print('Test windows activations shape after reshape is {}'.format(test_acts_window.shape))

Training windows activations shape after reshape is (3780, 432)
Validation windows activations shape after reshape is (566, 432)
Test windows activations shape after reshape is (569, 432)


In [6]:
list(label_dict.keys())

['train_labels', 'valid_labels', 'test_labels']

In [7]:
### --- inspect labels
list(label_dict.keys())

len(label_dict['train_labels'])
type(label_dict['train_labels'])

print(*label_dict['train_labels'][0:40]) # * tuple unpacking for print on one horizontal line

['train_labels', 'valid_labels', 'test_labels']

3780

list

AF AF AF AF AN AF AN AN AN AN DI DI DI DI DI HA HA HA HA HA NE NE NE SA NE NE SA SA SU SA SA SU SU SU SU AF AF AF AF AF


# Transform Data

In [8]:
#scale features to [0,1]
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1)) #default range, but let's be explicit

train_features = (train_acts_window, train_acts_channel, train_acts_last,
                  valid_acts_window, valid_acts_channel, valid_acts_last,
                  test_acts_window, test_acts_channel, test_acts_last)

#scale all features
X_train_window, X_train_channel, X_train_last, X_valid_window, X_valid_channel, X_valid_last, X_test_window, X_test_channel, X_test_last = [min_max_scaler.fit_transform(X) for X in train_features]

# Train and Tune Model

### LAST Activations

In [9]:
#XGBoost needs numerical targets, so encode strings
ytrain = label_dict['train_labels']
yvalid = label_dict['valid_labels']
ytest = label_dict['test_labels']

#xgboost needs numeric labels
#https://machinelearningmastery.com/data-preparation-gradient-boosting-xgboost-python/
from sklearn.preprocessing import LabelEncoder
# encode string class values as integers
label_encoder_train = LabelEncoder()
label_encoder_train = label_encoder_train.fit(ytrain)
label_encoded_ytrain = label_encoder_train.transform(ytrain)

label_encoder_valid = LabelEncoder()
label_encoder_valid = label_encoder_valid.fit(yvalid)
label_encoded_yvalid = label_encoder_valid.transform(yvalid)


label_encoder_test = LabelEncoder()
label_encoder_test = label_encoder_test.fit(ytest)
label_encoded_ytest = label_encoder_test.transform(ytest)

In [10]:
set(ytrain) #so 7 classes, for num_class param in XGBClassifier below

{'AF', 'AN', 'DI', 'HA', 'NE', 'SA', 'SU'}

# XGBoost

### Tuning max_depth and min_child_weight


In [11]:
max_depth = range(4,12,2)
min_child_weight = range(1,6,2)
trees_1 = []
acc_1 = []
comb_depth_child = []
for combination in itertools.product(max_depth, min_child_weight):
    comb_depth_child.append(combination)
for i, j in comb_depth_child:
    alg = XGBClassifier(learning_rate = 0.2, n_estimators = 1000, max_depth = i, min_child_weight = j, gamma = 0,
                        subsample = 0.5, colsample_bytree=0.6, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4)
    xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
    xgvalid = xgb.DMatrix(X_valid_channel, label_encoded_yvalid)
    xgb_params = alg.get_xgb_params()
    y_te_str = ','.join(str(e) for e in label_encoded_yvalid)
    xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgvalid, y_te_str)],
                          early_stopping_rounds = 30, verbose_eval = 150)
    trees_1.append(xgbresults.best_ntree_limit)
    acc_1.append(np.mean(xgbresults.predict(xgvalid) == label_encoded_yvalid))

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[17]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[26]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

In [12]:
#we find (6, 5) to be the best combination (tied with 8, 1), 0.8551236749116607

max(acc_1)
acc_1
comb_depth_child

0.8551236749116607

[0.8533568904593639,
 0.8462897526501767,
 0.8480565371024735,
 0.8533568904593639,
 0.8445229681978799,
 0.8551236749116607,
 0.8551236749116607,
 0.8480565371024735,
 0.8498233215547704,
 0.8533568904593639,
 0.8462897526501767,
 0.8498233215547704]

[(4, 1),
 (4, 3),
 (4, 5),
 (6, 1),
 (6, 3),
 (6, 5),
 (8, 1),
 (8, 3),
 (8, 5),
 (10, 1),
 (10, 3),
 (10, 5)]

### Tuning gamma

In [14]:
max_depth = 6
min_child_weight = 5
trees_2 = []
acc_2 = []
gamma = [0,0.1,0.2] #cutting params for efficiency
    
for g in gamma:
    alg = XGBClassifier(learning_rate = 0.2, n_estimators = 1000, max_depth = max_depth, min_child_weight = min_child_weight, gamma = g,
                        subsample = 0.5, colsample_bytree=0.6, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4)
    xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
    xgvalid = xgb.DMatrix(X_valid_channel, label_encoded_yvalid)
    xgb_params = alg.get_xgb_params()
    y_te_str = ','.join(str(e) for e in label_encoded_yvalid)
    xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgvalid, y_te_str)],
                          early_stopping_rounds = 30, verbose_eval = 150)
    trees_2.append(xgbresults.best_ntree_limit)
    acc_2.append(np.mean(xgbresults.predict(xgvalid) == label_encoded_yvalid))

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[17]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

In [15]:
#selects gamma of 0, 0.8551236749116607 \
acc_2

[0.8551236749116607, 0.8498233215547704, 0.8498233215547704]

### Tuning subsample and colsample_bytree

In [17]:
max_depth = 6
min_child_weight = 5
gamma = 0
trees_3 = []
acc_3 = []
subsample = [0.6, 1]
colsample_bytree = [0.6, 1]
comb_samp_col = []
for combination in itertools.product(subsample, colsample_bytree):
    comb_samp_col.append(combination)
for i,j in comb_samp_col:
    alg = XGBClassifier(learning_rate = 0.2, n_estimators = 1000, max_depth = max_depth, min_child_weight = min_child_weight, gamma = gamma,
                        subsample = i, colsample_bytree=j, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4)
    xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
    xgvalid = xgb.DMatrix(X_valid_channel, label_encoded_yvalid)
    xgb_params = alg.get_xgb_params()
    y_te_str = ','.join(str(e) for e in label_encoded_yvalid)
    xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgvalid, y_te_str)],
                          early_stopping_rounds = 30, verbose_eval = 150)
    trees_3.append(xgbresults.best_ntree_limit)
    acc_3.append(np.mean(xgbresults.predict(xgvalid) == label_encoded_yvalid))

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[28]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

In [18]:
acc_3 # no improvement in accuracy, proceed with previous values (0.5, 0.6)
comb_samp_col

[0.8409893992932862,
 0.8462897526501767,
 0.8409893992932862,
 0.8392226148409894]

[(0.6, 0.6), (0.6, 1), (1, 0.6), (1, 1)]

### Tuning reg_alpha and reg_lambda

In [19]:
max_depth = 6
min_child_weight = 5
gamma = 0
trees_4 = []
acc_4 = []
subsample = 0.5 #note we didn't update these, we started with these values and tuning didn't produce better ones
colsample_bytree = 0.6 #note we didn't update these, we started with these values and tuning didn't produce better ones
reg_alpha = [0.1, 1, 100]
reg_lambda = [0.1, 1, 100]
comb_alp_lam = []

for combination in itertools.product(reg_alpha, reg_lambda):
    comb_alp_lam.append(combination)
for i,j in comb_alp_lam:
    alg = XGBClassifier(learning_rate = 0.2, n_estimators = 1000, max_depth = max_depth, min_child_weight = min_child_weight, gamma = gamma,
                        subsample = subsample, colsample_bytree=colsample_bytree, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4, reg_alpha = i, reg_lambda = j)
    xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
    xgvalid = xgb.DMatrix(X_valid_channel, label_encoded_yvalid)
    xgb_params = alg.get_xgb_params()
    y_te_str = ','.join(str(e) for e in label_encoded_yvalid)
    xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgvalid, y_te_str)],
                          early_stopping_rounds = 30, verbose_eval = 150)
    trees_4.append(xgbresults.best_ntree_limit)
    acc_4.append(np.mean(xgbresults.predict(xgvalid) == label_encoded_yvalid))

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[72]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

Stopping. Best iteration:
[18]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4

In [20]:
acc_4 #regularization DID NOT IMPROVE ACCURACY from previous 0.8551236749116607

[0.8498233215547704,
 0.842756183745583,
 0.8462897526501767,
 0.842756183745583,
 0.842756183745583,
 0.8462897526501767,
 0.8021201413427562,
 0.8021201413427562,
 0.8127208480565371]

### Decreasing the learning rate and getting n_estimators

In [21]:
max_depth = 6
min_child_weight = 5
gamma = 0
subsample = 0.5
colsample_bytree = 0.6
reg_alpha = 0 #did not improve accuracy, but being explicit here
reg_lambda = 0 #did not improve accuracy, but being explicit here

alg = XGBClassifier(learning_rate = 0.05, n_estimators = 1000, max_depth = max_depth, min_child_weight = min_child_weight, gamma = gamma,
                        subsample = subsample, colsample_bytree=colsample_bytree, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4, reg_alpha = reg_alpha, reg_lambda = reg_lambda)
xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
xgvalid = xgb.DMatrix(X_valid_channel, label_encoded_yvalid)
xgb_params = alg.get_xgb_params()
y_te_str = ','.join(str(e) for e in label_encoded_yvalid)
xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgvalid, y_te_str)],
                      early_stopping_rounds = 60, verbose_eval = 150)
trees_5 = xgbresults.best_ntree_limit
acc_5 = np.mean(xgbresults.predict(xgvalid) == label_encoded_yvalid)

[0]	0,0,0,0,1,0,1,1,2,1,2,2,2,1,2,3,3,3,4,3,4,4,3,4,4,5,5,6,5,5,6,5,6,6,6,0,0,0,1,1,0,0,1,1,1,2,2,2,2,3,3,3,2,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,5,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,4,4,4,3,4,5,5,5,6,6,6,5,5,6,6,0,0,1,0,0,1,0,1,1,1,2,2,3,3,3,2,2,2,3,4,3,4,4,5,5,5,5,4,4,6,5,6,6,6,6,0,0,0,0,0,1,1,1,2,1,2,1,2,2,3,2,3,3,3,4,4,5,3,5,4,4,4,5,5,5,6,6,6,6,6,2,2,0,1,1,1,1,1,0,0,0,0,0,1,2,1,2,1,2,1,1,2,2,3,3,4,3,3,4,4,5,4,4,3,5,5,5,5,6,6,6,6,6,0,0,1,0,0,1,1,0,1,1,2,2,2,2,2,3,3,4,3,4,3,3,4,4,4,5,5,5,6,5,6,6,5,6,6,0,0,0,0,0,1,1,1,1,1,2,3,2,2,3,3,2,2,3,3,4,4,4,5,5,5,4,5,6,5,4,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,3,2,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,2,2,1,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,4,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,

In [22]:
trees_5 # selects 40 as n_estimators
acc_5 #accuracy NOT improved over 0.8551236749116607

40

0.8498233215547704

*Training Set Accuracy Score*

# Evaluate Model on Test Set
***do not do until very very last step***

In [23]:
max_depth = 6
min_child_weight = 5
gamma = 0
subsample = 0.4
colsample_bytree = 0.6
reg_alpha = 0 #did not improve accuracy, but being explicit here
reg_lambda = 0 #did not improve accuracy, but being explicit here
learning_rate = 0.05
#n_estimators = 40, our reduced learning rate and best n_estimators did not improve accuracy
alg = XGBClassifier(learning_rate = learning_rate, n_estimators = 1000, max_depth = max_depth, min_child_weight = min_child_weight, gamma = gamma,
                        subsample = subsample, colsample_bytree=colsample_bytree, objective= 'multi:softmax', seed = 42, scale_pos_weight = 1, 
                        num_class = 7, n_jobs = 4, reg_alpha = reg_alpha, reg_lambda = reg_lambda)
xgtrain = xgb.DMatrix(X_train_channel, label_encoded_ytrain)
xgtest = xgb.DMatrix(X_test_channel, label_encoded_ytest)
xgb_params = alg.get_xgb_params()
y_te_str = ','.join(str(e) for e in label_encoded_ytest)
# this indicates we should use n_estimators for num_boost_round, xbg.train ignores n_estimators param
# https://datascience.stackexchange.com/questions/17282/xgbregressor-vs-xgboost-train-huge-speed-difference
xgbresults = xgb.train(xgb_params, xgtrain, num_boost_round = 1000, evals = [(xgtest, y_te_str)], verbose_eval = 150, early_stopping_rounds=60)
acc_6 = np.mean(xgbresults.predict(xgtest) == label_encoded_ytest)

[0]	0,0,0,1,0,0,1,1,1,1,2,2,2,2,2,3,3,4,3,4,3,3,5,4,4,4,5,5,5,6,6,5,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,1,0,0,1,1,1,2,1,2,2,2,2,3,3,3,3,4,3,4,4,4,5,4,5,5,6,6,6,6,5,5,5,4,4,4,6,0,0,0,1,0,0,1,1,2,1,1,2,2,2,2,3,3,3,3,4,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,3,3,2,2,2,3,2,3,4,3,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,1,1,0,1,1,1,2,2,2,2,3,3,3,3,2,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,1,0,0,1,1,1,1,2,2,2,2,3,3,2,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0,1,1,1,2,2,2,1,1,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,

## Our final test set accuracy rate

In [24]:
acc_6

0.8980667838312829