In [2]:
import sys
sys.path.append('../src')
sys.path.append('../utils')
sys.path.append('../visualization')
    
import numpy as np
import pandas as pd
import pickle
import os

from dimensionality_reduction import apply_all_dimensionality_reduction
from classification import apply_classifiers_original_features, apply_classifiers_reduced_data, apply_classifiers_with_random_features

In [3]:
# Read processed data
X_train = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/orl/processed/X_train.pkl', 'rb'))
X_test = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/orl/processed/X_test.pkl', 'rb'))
y_train = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/orl/processed/y_train.pkl', 'rb'))
y_test = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/orl/processed/y_test.pkl', 'rb'))

In [3]:
# Apply classifier on original data
apply_classifiers_original_features(X_train, y_train, X_test, y_test, 'orl', classifiers=['SGD']) # Returns score on test data, score on train data

(0.925, 1.0)

In [4]:
# Apply classifier on original data
apply_classifiers_original_features(X_train, y_train, X_test, y_test, 'orl', classifiers=['SVC'])

(0.925, 1.0)

In [None]:
# Apply classifier on original data selecting features at random
apply_classifiers_with_random_features(X_train, X_test, y_train, y_test,
                                       num_iterations=4,
                                       num_dims=list(range(500, 10304, 500)) + [10304],
                                       size=10304,
                                       dataset_name='orl')

In [8]:
# Reduce data
reduced_X = apply_all_dimensionality_reduction(X_train, X_test, y_train, 'orl', n_components_list=[15, 50, 150, 300],
                                               models_list=['SLMVP', 'PCA', 'LLE'])

('300Dim', 'LLE', 'k=18-reg=0.001'): 100%|██████████| 4/4 [00:24<00:00,  6.21s/it]      


Saved reduced data at path: /Users/espina/Unsynced/Whitepaper/data/orl/reduced/reduced_X.pkl


In [5]:
reduced_X = pickle.load(open(os.path.dirname(os.getcwd()) + '/data/orl/reduced/reduced_X.pkl', 'rb'))

In [9]:
# Apply classifier on reduced data
scores_df, reduced_X_best = apply_classifiers_reduced_data(reduced_X, y_train, y_test, 'orl',
                                                           classifiers=['SGD', 'SVC'])

SVC: 100%|██████████| 24/24 [00:19<00:00,  1.22it/s]
SGD: 100%|██████████| 24/24 [00:02<00:00,  9.41it/s]


Scores saved at: /results/scores/orl_scores.csv
Reduced data saved at: /data/orl/reduced/reduced_X_best.pkl


In [10]:
scores_df.head(10)

Unnamed: 0,Model,Score,Score Train,Params,Dimensions,Dim. Technique,Dim. Params
41,SGD,1.0,1.0,"{'alpha': 0.0001, 'average': False, 'class_wei...",150Dim,LLE,k=18-reg=0.001
47,SGD,0.975,1.0,"{'alpha': 0.0001, 'average': False, 'class_wei...",300Dim,LLE,k=18-reg=0.001
17,SVC,0.975,0.997222,"{'C': 0.001, 'gamma': 1e-05, 'kernel': 'rbf'}",150Dim,LLE,k=18-reg=0.001
23,SVC,0.95,1.0,"{'C': 0.001, 'gamma': 1e-05, 'kernel': 'rbf'}",300Dim,LLE,k=18-reg=0.001
22,SVC,0.95,1.0,"{'C': 0.001, 'gamma': 1e-07, 'kernel': 'rbf'}",300Dim,PCA,
46,SGD,0.925,0.997222,"{'alpha': 0.0001, 'average': False, 'class_wei...",300Dim,PCA,
10,SVC,0.925,0.997222,"{'C': 0.001, 'gamma': 1e-07, 'kernel': 'rbf'}",50Dim,PCA,
16,SVC,0.925,1.0,"{'C': 0.001, 'gamma': 1e-07, 'kernel': 'rbf'}",150Dim,PCA,
35,SGD,0.85,0.95,"{'alpha': 0.0001, 'average': False, 'class_wei...",50Dim,LLE,k=18-reg=0.001
4,SVC,0.85,0.966667,"{'C': 0.001, 'gamma': 1e-07, 'kernel': 'rbf'}",15Dim,PCA,
