# Dimensionality Reduction and Classification on MNIST Dataset

In [1]:
import sys
sys.path.append('../src')
sys.path.append('../utils')
sys.path.append('../visualization')

import numpy as np
import pandas as pd
import pickle
import os

from dimensionality_reduction import apply_all_dimensionality_reduction
from classification import apply_classifiers_original_features, apply_classifiers_reduced_data

# Input
dataset_name = 'mnist'

In [2]:
# Read processed data
X_train = pickle.load(open(os.path.dirname(os.getcwd()) + f'/data/{dataset_name}/processed/X_train.pkl', 'rb'))
X_test = pickle.load(open(os.path.dirname(os.getcwd()) + f'/data/{dataset_name}/processed/X_test.pkl', 'rb'))
y_train = pickle.load(open(os.path.dirname(os.getcwd()) + f'/data/{dataset_name}/processed/y_train.pkl', 'rb'))
y_test = pickle.load(open(os.path.dirname(os.getcwd()) + f'/data/{dataset_name}/processed/y_test.pkl', 'rb'))

In [3]:
# Apply classifier on original data
apply_classifiers_original_features(X_train, y_train, X_test, y_test, 'mnist', classifiers=['XGBOOST', 'SVC', 'SGD'])

0.85

In [4]:
# Reduce data
reduced_X = apply_all_dimensionality_reduction(X_train, X_test, y_train, 'mnist', n_components_list=[50, 100, 150],
                                               models_list=['SLMVP', 'PCA', 'KPCA', 'LOL', 'LLE'])

('150Dim', 'LLE', 'k=24-reg=0.001'): 100%|██████████| 3/3 [00:06<00:00,  2.11s/it]      

Saved reduced data at path: /Users/espina/Unsynced/Whitepaper/data/mnist/reduced/reduced_X.pkl





In [2]:
# Read reduced data
reduced_X = pickle.load(open(os.path.dirname(os.getcwd()) + f'/data/{dataset_name}/reduced/reduced_X.pkl', 'rb'))

In [4]:
# Apply classifier on reduced data
scores_df, reduced_X_best = apply_classifiers_reduced_data(reduced_X, y_train, y_test, 'mnist',
                                                           classifiers=['XGBOOST', 'SVC', 'SGD'])

XGBoost: 100%|██████████| 18/18 [04:53<00:00, 16.33s/it]
SVC: 100%|██████████| 18/18 [00:34<00:00,  1.93s/it]
SGD: 100%|██████████| 18/18 [00:00<00:00, 19.40it/s]

Scores saved at: /scores/mnist_scores.csv
Reduced data saved at: /data/mnist/reduced/reduced_X_best.pkl





In [9]:
scores_df

Unnamed: 0,Model,Score,Params,Dimensions,Dim. Technique,Dim. Params
23,SVC,0.9,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",50Dim,LLE,k=24-reg=0.001
29,SVC,0.88,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",100Dim,LLE,k=24-reg=0.001
35,SVC,0.87,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}",150Dim,LLE,k=24-reg=0.001
17,XGBoost,0.86,{'n_estimators': 50},150Dim,LLE,k=24-reg=0.001
53,SGD,0.85,"{'alpha': 0.0001, 'average': False, 'class_wei...",150Dim,LLE,k=24-reg=0.001
47,SGD,0.84,"{'alpha': 0.0001, 'average': False, 'class_wei...",100Dim,LLE,k=24-reg=0.001
11,XGBoost,0.84,{'n_estimators': 100},100Dim,LLE,k=24-reg=0.001
5,XGBoost,0.83,{'n_estimators': 20},50Dim,LLE,k=24-reg=0.001
4,XGBoost,0.83,{'n_estimators': 50},50Dim,LOL,
41,SGD,0.82,"{'alpha': 0.0001, 'average': False, 'class_wei...",50Dim,LLE,k=24-reg=0.001
