<a href="https://colab.research.google.com/github/mohammadsjahanbakhsh/dim_reduction/blob/main/handwriting_digit_with_SVC_Pipeline_PCA_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
import numpy as np

In [None]:
'''
This dataset has 70,000 datapoints with 784 features .
 we will use a version from Open ML that has 784 pixels (28 x 28).
The feature values range from 0 to 255 (which we interpret on a gray scale
with 0 being white and 255 being black). The target values are the numbers 0-9.
Note that the target values are stored as strings and not integers.
'''

X,y = fetch_openml("mnist_784",version=1,return_X_y = True ,parser='auto')




In [None]:
def convert_to_string(n):
  nums={    '0':'zero',
            '1':'one',
            '2':'two',
            '3':'three',
            '4':'four',
            '5':'five',
            '6':'six',
            '7':'seven',
            '8':'eight',
            '9':'nine'}

  return nums[str(n)]


**plt.subplots_adjust()**



left  = 0.125  # the left side of the subplots of the figure

right = 0.9    # the right side of the subplots of the figure

bottom = 0.1   # the bottom of the subplots of the figure

top = 0.9      # the top of the subplots of the figure

wspace = 0.2   # the amount of width reserved for blank space between subplots

hspace = 0.2   # the amount of height reserved for white space between subplots




In [None]:
def plot_gallery(numbers,y, n_row = 4, n_col = 4):

    fig, ax = plt.subplots(n_row , n_col)
    plt.subplots_adjust(bottom = 0, left =.01, right =.99, top =.90, hspace =.35,wspace=.2)
    for i in range(n_row ):
      for j in range(n_col):

        ax[i,j].matshow(numbers[i+j,:].reshape(28,28) , cmap=plt.cm.gray)
        ax[i,j].set_title(convert_to_string(y[i+j]))
        ax[i,j].set_xticks(())
        ax[i,j].set_yticks(())
        # ax[i,j].title.set_visible(False)


plot_gallery(X.values,y)

In [None]:
from sklearn.model_selection import train_test_split
X5=X[y.astype(int) <= 5]
y5=y[y.astype(int) <= 5]
X_train, X_test, y_train, y_test = train_test_split(
                              X, y, test_size=0.25, stratify=y ,
                              shuffle=True,random_state=313
                              )
X5_train, X5_test, y5_train, y5_test = train_test_split(
                            X5, y5, test_size=0.25, stratify=y5 ,
                              shuffle=True,random_state=313
                              )


In [None]:
from sklearn.svm import SVC
from time import time
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

svc = SVC(kernel ='rbf', class_weight ='balanced')

pipe = Pipeline(
    steps=[("scaler", StandardScaler()),
            ("pca", PCA(n_components=60)),
            ("svc", SVC(kernel ='rbf', class_weight ='balanced'))])

t = time()

pipe.fit(X_train,y_train)
print("fit time :%.3f"%(time()-t))
t=time()
y_pred=pipe.predict(X_test)
print("predict time :%.3f"%(time()-t))



In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix , ConfusionMatrixDisplay

In [None]:
import seaborn as sns
def display_confusion_matrix_heatmap( cf_matrix ,list_class):
  plt.figure(figsize = (8,8))

  ax=sns.heatmap(cf_matrix/np.sum(cf_matrix),
            annot=True ,
            fmt='.2%',
            cmap=sns.cubehelix_palette(as_cmap=True) ,
            annot_kws={"size": 8},
            xticklabels = range(10) ,
            yticklabels = range(10) ,
            vmin=0,vmax=0.01
                 )

  plt.title("Confusion Matrix",fontsize=20)
  plt.xlabel('Predicted label',fontsize=15)
  plt.ylabel('True label',fontsize=15)

  plt.show()
list_class = range(y.nunique())
cf_matrix = confusion_matrix(y_test.astype(int),y_pred.astype(int),labels = list_class)
display_confusion_matrix_heatmap(cf_matrix , list_class )


In [None]:
def display_confusion_matrix( cf_matrix ,list_class):
  disp = ConfusionMatrixDisplay(confusion_matrix=cf_matrix,
                           display_labels=list_class)
  disp.plot()
  plt.show()

display_confusion_matrix( cf_matrix ,list_class)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

pipe_pca_svm = make_pipeline(StandardScaler() , PCA(n_components=60) ,LinearSVC(max_iter=1000) )
t=time()
pipe_pca_svm.fit(X5_train,y5_train)
print("fit time :%.3f"%(time()-t))
t=time()
y5_pred=pipe_pca_svm.predict(X5_test)
print("predict time :%.3f"%(time()-t))
print(classification_report(y5_test, y5_pred))