In [11]:
import pandas as pd
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import seaborn as sns 
from sklearn import datasets
from sklearn.model_selection import train_test_split

### decision_function for multicalss

In [None]:
mnist_path = 'mnist-original.mat'

mnist = loadmat(mnist_path)
X = mnist['data'].T
y = mnist['label'][0]

#peek a digit
some_index = 36000
some_digit = X[some_index]
some_digit_image = some_digit.reshape(28,28)

# split to train & test & #shuffle
train_size = 60000
shuffle_index = np.random.permutation(train_size)
X_train,y_train = X[shuffle_index],y[shuffle_index]
display(y_train.shape)

sgd_clf = SGDClassifier(max_iter=50)
sgd_clf.fit(X_train,y_train)
sgd_clf.predict([some_digit])

some_digit_scores = sgd_clf.decision_function([some_digit])
display('some_digit_scores',some_digit_scores)
display('argmax',np.argmax(some_digit_scores))
display('classes_',sgd_clf.classes_)
display('classes_[5])',sgd_clf.classes_[5])



### OneVsOneClassifier

In [None]:
ovo_clf = OneVsOneClassifier(SGDClassifier(max_iter=5))
ovo_clf.fit(X_train,y_train)

display(ovo_clf.predict([some_digit]))
display(len(ovo_clf.estimators_))

### RandomForestClassifier

In [None]:
forest_clf = RandomForestClassifier(n_estimators=10)
forest_clf.fit(X_train,y_train)
display(forest_clf.predict([some_digit]))
display(forest_clf.predict_proba([some_digit]))

### evaluate classifiers - cross_val_score()

In [None]:
display(cross_val_score(sgd_clf,X_train,y_train,cv=3,scoring='accuracy'))
#scaling the input to increase accuracy above 90%
scaler = StandardScaler()
X_trained_scaled = scaler.fit_transform(X_train.astype('float'))
display(cross_val_score(sgd_clf,X_trained_scaled,y_train,cv=3,scoring='accuracy'))

### confusion matrix

In [None]:
y_train_pred = cross_val_predict(sgd_clf,X_trained_scaled,y_train,cv=3)
conf_mat = confusion_matrix(y_train,y_train_pred)
display(conf_mat)

In [None]:
def plot_conf_matrix(conf_mat,fmt='d'):
    plt.figure(figsize=(8,8))
    sns.heatmap(conf_mat,square=True,annot=True,cmap='Blues',fmt=fmt,cbar=False)
    plt.xlabel('Predicted digit',fontsize=12)
    plt.ylabel('True digit',fontsize=12)

plot_conf_matrix(conf_mat)

In [22]:
#Ex train multi-class SGDClassifer
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train,X_test,y_train ,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train,X_test ,y_train ,y_test


(array([[4.6, 3.6, 1. , 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [6.7, 3.1, 4.4, 1.4],
        [4.8, 3.4, 1.6, 0.2],
        [4.4, 3.2, 1.3, 0.2],
        [6.3, 2.5, 5. , 1.9],
        [6.4, 3.2, 4.5, 1.5],
        [5.2, 3.5, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.2, 4.1, 1.5, 0.1],
        [5.8, 2.7, 5.1, 1.9],
        [6. , 3.4, 4.5, 1.6],
        [6.7, 3.1, 4.7, 1.5],
        [5.4, 3.9, 1.3, 0.4],
        [5.4, 3.7, 1.5, 0.2],
        [5.5, 2.4, 3.7, 1. ],
        [6.3, 2.8, 5.1, 1.5],
        [6.4, 3.1, 5.5, 1.8],
        [6.6, 3. , 4.4, 1.4],
        [7.2, 3.6, 6.1, 2.5],
        [5.7, 2.9, 4.2, 1.3],
        [7.6, 3. , 6.6, 2.1],
        [5.6, 3. , 4.5, 1.5],
        [5.1, 3.5, 1.4, 0.2],
        [7.7, 2.8, 6.7, 2. ],
        [5.8, 2.7, 4.1, 1. ],
        [5.2, 3.4, 1.4, 0.2],
        [5. , 3.5, 1.3, 0.3],
        [5.1, 3.8, 1.9, 0.4],
        [5. , 2. , 3.5, 1. ],
        [6.3, 2.7, 4.9, 1.8],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5

In [31]:
#SGDClassifier
sgd_clf = SGDClassifier(max_iter=50,random_state=42)
sgd_clf.fit(X_train,y_train)

display(cross_val_score(sgd_clf,X_train,y_train,cv=3,scoring='accuracy'))

#conf_matrix X_trained_scaled
y_train_pred = cross_val_predict(sgd_clf,X_train,y_train,cv=3)
conf_mat = confusion_matrix(y_train,y_train_pred)
display(conf_mat)

def plot_conf_matrix(conf_mat,fmt='d'):
    plt.figure(figsize=(8,8))
    sns.heatmap(conf_mat,square=True,annot=True,cmap='Blues',fmt=fmt,cbar=False)
    plt.xlabel('Predicted digit',fontsize=12)
    plt.ylabel('True digit',fontsize=12)

#plot_conf_matrix(conf_mat)

array([0.75609756, 0.875     , 0.97435897])

array([[40,  0,  0],
       [ 1, 37,  3],
       [ 0, 12, 27]], dtype=int64)

In [32]:
#SGDClassifier + scaling 
scaler = StandardScaler()
X_trained_scaled = scaler.fit_transform(X_train.astype('float'))

display(cross_val_score(sgd_clf,X_trained_scaled,y_train,cv=4,scoring='accuracy'))

#conf_matrix X_trained_scaled
y_train_pred_scaled = cross_val_predict(sgd_clf,X_trained_scaled,y_train,cv=4)
conf_mat = confusion_matrix(y_train,y_train_pred_scaled)
display(conf_mat)

array([0.96774194, 0.8       , 0.96666667, 1.        ])

array([[40,  0,  0],
       [ 1, 36,  4],
       [ 0,  3, 36]], dtype=int64)

In [33]:
#Ex OneVsOneClassifier
ovo_clf = OneVsOneClassifier(SGDClassifier(max_iter=50,random_state=42))
ovo_clf.fit(X_train,y_train)

len(ovo_clf.estimators_)

display(ovo_clf.decision_function(X[:5]))
display(ovo_clf.predict(X[:5]))
display(y[:5])

#conf_matrix X_trained_scaled
y_train_pred_ovo = cross_val_predict(ovo_clf,X_train,y_train,cv=3)
conf_mat = confusion_matrix(y_train,y_train_pred_ovo)
display(conf_mat)

array([[ 2.33173261,  1.33290436, -0.33299468],
       [ 2.33139042,  1.33286527, -0.33295571],
       [ 2.33158027,  1.33287084, -0.33296698],
       [ 2.33125852,  1.33283741, -0.3329326 ],
       [ 2.33176351,  1.33290275, -0.33299509]])

array([0, 0, 0, 0, 0])

array([0, 0, 0, 0, 0])

array([[40,  0,  0],
       [ 0, 39,  2],
       [ 0, 15, 24]], dtype=int64)

In [34]:
#Ex OneVsRestClassifier
ovr_clf = OneVsRestClassifier(SGDClassifier(max_iter=50,random_state=42))
ovr_clf.fit(X_train,y_train)

len(ovr_clf.estimators_)

#display(ovr_clf.decision_function(X[:5]))
#display(ovr_clf.predict(X[:5]))
#display(y[:5])


#conf_matrix X_trained_scaled
y_train_pred_ovr = cross_val_predict(ovr_clf,X_train,y_train,cv=3)
conf_mat = confusion_matrix(y_train,y_train_pred_ovr)
display(conf_mat)

array([[31,  9,  0],
       [ 6, 35,  0],
       [ 5, 22, 12]], dtype=int64)