In [136]:
## MultiClass Classification with XGboost
import xgboost as xgb
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix, accuracy_score
import os
import gzip
import numpy as np

# define a data load the data 
def load_mnist(path, kind='train'):

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels


In [137]:


try:

    X_train, y_train = load_mnist('data/fashion', kind='train')
    X_test, y_test =  load_mnist('data/fashion', kind='t10k')
    X_train.shape, y_train.shape, X_test.shape, y_test.shape
    print('data loaded from disk')
except:
    print('downloading data ...')
    os.system('wget https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-images-idx3-ubyte.gz  -P ./data/fashion -N')
    os.system('wget https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-labels-idx1-ubyte.gz -P ./data/fashion -N' )

    os.system('wget https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-images-idx3-ubyte.gz -P ./data/fashion -N' )

    os.system('wget https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-labels-idx1-ubyte.gz -P ./data/fashion -N' )
    print('download complete, loading')
    X_train, y_train = load_mnist('data/fashion', kind='train')
    X_test, y_test =  load_mnist('data/fashion', kind='t10k')
    X_train.shape, y_train.shape, X_test.shape, y_test.shape
    

data loaded from disk


## Multi Label Logistic Rgression
Multi label regression is possible on smaller data sets directiony 

In [142]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression (multi_class='multinomial', max_iter=5)

model.fit(X_train, y_train)
preds_test = model.predict_proba(X_test)
preds_test_labels = np.argmax(preds_test, axis =1)
print('LogisticRegression acc {}'.format(accuracy_score(y_test, preds_test_labels)))


LogisticRegression acc 0.6562



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



#### XGBoost 
Supports mutli class but not multi label, using the xgb.DMatrix as input can speed up training as well as handle sparse inputs.  The classifier can only hand a list of integer classes, not an array of multi labels. 

+ 'multi:softprob' multi class probablities
+ 'multi:softmax' most likely class (no probablities)

In [145]:
## Soft Max 

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test) # takes always a list of integer classes

# setup params
params = {}
params['eta'] =  .3
params['gamma'] = 1
params['max_depth'] = 3
params['min_child_weight'] = 10
params['max_delta_step'] = 2
params['colsample_bytree']= 1
params['verbosity'] = 0
params['random_state'] = 0
params['objective'] = 'multi:softprob'
params['num_class'] = 10
params['eval_metric'] = 'merror'

# fit model
model = xgb.train(params, 
                  dtrain=dtrain, 
                  num_boost_round=3,
                  evals=((dtrain, 'train'), (dtest, 'test')))

preds_test = model.predict(dtest)
preds_test_labels = np.argmax(preds_test, axis =1)

print(confusion_matrix(y_test, preds_test_labels))

print('acc {}'.format(accuracy_score(y_test, preds_test_labels)))

[0]	train-merror:0.290567	test-merror:0.3034
[1]	train-merror:0.235	test-merror:0.2494
[2]	train-merror:0.22005	test-merror:0.2335
[[717   2  13 126  12   0 113   1  16   0]
 [  7 863   8  97  14   0   7   0   4   0]
 [ 18   4 721  18 157   1  73   0   7   1]
 [ 27   7  17 821  69   6  46   0   5   2]
 [  3   4 178  70 643   0  91   0  10   1]
 [  0   2   3   3   0 803   0 110   8  71]
 [164   3 145  89 110   1 461   2  25   0]
 [  0   0   0   0   1  11   0 884   1 103]
 [  4   1  25  60  29   4  19  25 829   4]
 [  0   0   4   1   1  11   0  56   4 923]]
acc 0.7665


## SoftProbablity with Multi Label Support Using Decision Trees
Decision Tree Natively supports multi label soft prob output (where class may co-occure) 
+ Handelks list of integer class input for multi class classification
+ Handels array of classes for multi label classification 


In [146]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MultiLabelBinarizer
from sklearn.metrics import roc_auc_score
l = MultiLabelBinarizer().fit(np.reshape(y_train, (-1,1)))
y_train_ohe = l.transform(np.reshape(y_train, (-1,1)))
y_test_ohe = l.transform(np.reshape(y_test, (-1,1)))
y_train_ohe.shape, y_test_ohe.shape

((60000, 10), (10000, 10))

In [148]:
from sklearn.tree import DecisionTreeClassifier
model =  DecisionTreeClassifier(max_depth=10, min_samples_leaf=10).fit(X_train, y_train_ohe)

pred_test = model.predict_proba(X_test)
preds_test_labels = np.argmax(preds_prob_test, axis =1)

print(confusion_matrix(y_test, preds_test_labels ))

print('acc {}'.format(accuracy_score(y_test, preds_test_labels)))


[[847   0  14  41   7   1  78   0  12   0]
 [  2 965   3  21   5   0   2   0   2   0]
 [ 15   0 753   8 148   0  69   0   7   0]
 [ 22   6   9 898  30   0  33   0   2   0]
 [  0   1 104  37 782   0  73   0   3   0]
 [  0   0   0   1   0 938   0  36   1  24]
 [162   1 125  42  94   0 558   0  18   0]
 [  0   0   0   0   0  14   0 944   0  42]
 [  0   1   9   3   2   2   9   4 970   0]
 [  0   0   0   1   0   6   1  42   2 948]]
acc 0.8603


## Using OneVsRest Classifier to Extend Classifcation Binary Estimator
Decision Tree Natively supports multi label soft prob output (where class may co-occure) 
+ Handelks list of integer class input for multi class classification
+ Handels array of classes for multi label classification 

In [153]:
## Multiple Models
from sklearn.multiclass import   OneVsRestClassifier
model = OneVsRestClassifier( XGBClassifier(max_depth=3,max_bin=20), n_jobs=10)
model = model.fit(X_train, y_train_ohe)

preds_test = model.predict(X_test)
preds_test_labels = np.argmax(preds_prob_test, axis =1)

print(confusion_matrix(y_test, preds_test_labels))

print('acc {}'.format(accuracy_score(y_test, preds_test_labels)))


[[926   1   4  41   2   1  12   1  11   1]
 [ 78 897   1  19   2   1   0   0   2   0]
 [321   4 603   4  54   1  10   0   3   0]
 [201  20   1 763   5   3   4   0   3   0]
 [408   4 105  54 401   0  22   0   5   1]
 [173   1   1   1   0 752   0  47   3  22]
 [538   2  79  36  61   1 265   1  17   0]
 [ 93   1   0   0   0  23   0 854   0  29]
 [145   1   3   7   4   5   0  31 802   2]
 [104   0   0   2   0  11   0  44   2 837]]
acc 0.71


#### Analyzing Model
Since the One vs Rest Classifier can handel multi label predictions, that assumes that each class can occure inpendently  and AUC can be looked at.
* In fashion MNIST, this actually is not the case, this is for demonstration purposes.  

In [154]:
def multi_auc(y_true, y_preds):
    for i in range(y_true.shape[1]):
        yield roc_auc_score(y_true[:, i],y_preds[:, i])
list(multi_auc(y_test_ohe, preds_prob_test))

[0.8323333333333334,
 0.9470555555555555,
 0.7914444444444444,
 0.8769444444444445,
 0.7111666666666666,
 0.8742777777777777,
 0.6361111111111112,
 0.9267222222222222,
 0.9171666666666667,
 0.9252777777777779]

#### Mutli Output Models wrapper to niavely extend  using MultiOutputClassifier as with any sklearn classifier estimator as the base model

In [193]:
class_labels = ['T-shirt/top', 'Trouser', 'Pullover','Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag','Ankle boot']


cm = pd.DataFrame(confusion_matrix(y_test, preds_test_labels), 
                  columns=['true:' + str(l)  for l in class_labels], 
                  index=['pred:' + str(l)  for l in class_labels]).transpose()
cm

Unnamed: 0,pred:T-shirt/top,pred:Trouser,pred:Pullover,pred:Dress,pred:Coat,pred:Sandal,pred:Shirt,pred:Sneaker,pred:Bag,pred:Ankle boot
true:T-shirt/top,926,78,321,201,408,173,538,93,145,104
true:Trouser,1,897,4,20,4,1,2,1,1,0
true:Pullover,4,1,603,1,105,1,79,0,3,0
true:Dress,41,19,4,763,54,1,36,0,7,2
true:Coat,2,2,54,5,401,0,61,0,4,0
true:Sandal,1,1,1,3,0,752,1,23,5,11
true:Shirt,12,0,10,4,22,0,265,0,0,0
true:Sneaker,1,0,0,0,0,47,1,854,31,44
true:Bag,11,2,3,3,5,3,17,0,802,2
true:Ankle boot,1,0,0,0,1,22,0,29,2,837


In [None]:
## Viualizing Results

In [200]:
import plotly.figure_factory as ff

'''
Create a really nice confusion Matrix , takes class_labels, y_test, preds_test
'''

class_labels = ['T-shirt/top', 'Trouser', 'Pullover','Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag','Ankle boot']



cm = pd.DataFrame(confusion_matrix(y_test, preds_test_labels), 
                  columns=['pred:' + str(l)  for l in class_labels], 
                  index=['true:' + str(l)  for l in class_labels]).transpose().iloc[::-1]


acc = accuracy_score(y_test, preds_test_labels)
fig = ff.create_annotated_heatmap(cm.values, 
                                  x=list(cm.index),
                                  y = list(cm.columns),
                    
                                colorscale='Portland')

fig.update_layout(
    title={
        'text': "Classifier Confusion Matrtix with Overall All Accuray {}".format(round(acc, 3)),
        'y':.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})


fig.show()

In [201]:
import plotly.express as px

from sklearn.metrics import roc_curve
auc_curve_list = []
preds_prob_test = model.predict_proba(X_test)
for i in range(y_test_ohe.shape[1]):
    fpr, tpr, _ = roc_curve(y_test_ohe[:,i], preds_prob_test [:, i])
    auc = roc_auc_score(y_test_ohe[:,i], preds_prob_test [:, i])
    auc_df = pd.DataFrame({'tpr':tpr, 'fpr':fpr}, index=[class_labels[i] + ' auc: {}'.format(round(auc,3))] * len(fpr))
    auc_curve_list.append(auc_df)
auc_curves = pd.concat(auc_curve_list)




In [202]:
fig = px.line(auc_curves, x="fpr", y="tpr",color=list(auc_curves.index), 
              color_discrete_sequence=px.colors.qualitative.Vivid)

fig.update_layout(
    title={
        'text': "MultiClass Classifier, Individual Class Label AUCs {}"})


fig.show()