# Dimensionality Reduction and Classification on MNIST Dataset

In [2]:
import sys
sys.path.append('../src')
sys.path.append('../utils')
sys.path.append('../visualization')

import numpy as np
import pandas as pd
import pickle
import os

from dimensionality_reduction import apply_all_dimensionality_reduction
from classification import apply_classifiers_original_features, apply_classifiers_reduced_data

In [5]:
# Read processed data
X_train = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/mnist/processed/X_train.pkl', 'rb'))
X_test = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/mnist/processed/X_test.pkl', 'rb'))
y_train = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/mnist/processed/y_train.pkl', 'rb'))
y_test = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/mnist/processed/y_test.pkl', 'rb'))

# Flatten the images
X_train = np.array(X_train).reshape((len(X_train), -1))
X_test = np.array(X_test).reshape((len(X_test), -1))

In [15]:
from sklearn.linear_model import SGDClassifier

model = SGDClassifier(random_state=42)
model.fit(X_train, y_train) 

In [None]:
# Apply classifier on original data
apply_classifiers_original_features(X_train, y_train, X_test, y_test, 'mnist', classifiers=['SVC'])

In [6]:
# Reduce data
reduced_X = apply_all_dimensionality_reduction(X_train, X_test, y_train, 'mnist', n_components_list=[5, 15, 50, 150],
                                               models_list=['SLMVP', 'PCA', 'KPCA', 'LOL', 'LLE'])

('5Dim', 'SLMVP', 'Radial-Gammas=0.01'):   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
# Apply classifier on reduced data
scores_df, reduced_X_best = apply_classifiers_reduced_data(reduced_X, y_train, y_test, 'mnist', classifiers=['SVC'])

SVC: 100%|██████████| 24/24 [00:15<00:00,  1.54it/s]


In [6]:
scores_df

Unnamed: 0,Model,Score,Params,Dimensions,Dim. Technique,Dim. Params
23,SVC,0.975,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}",150Dim,LLE,k=15-reg=0.001
17,SVC,0.93125,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",50Dim,LLE,k=15-reg=0.001
14,SVC,0.86875,"{'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}",50Dim,PCA,
20,SVC,0.85625,"{'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}",150Dim,PCA,
11,SVC,0.80625,"{'C': 100, 'gamma': 1, 'kernel': 'rbf'}",15Dim,LLE,k=15-reg=0.001
5,SVC,0.73125,"{'C': 1000, 'gamma': 1, 'kernel': 'rbf'}",5Dim,LLE,k=15-reg=0.001
2,SVC,0.675,"{'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}",5Dim,PCA,
8,SVC,0.60625,"{'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}",15Dim,PCA,
22,SVC,0.025,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}",150Dim,LOL,
21,SVC,0.025,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",150Dim,KPCA,Radial
