In [None]:
# Tutorial de Extração de Componentes Principais de Discriminação no banco de dados Iris

# Autor: Natanael Junior (natmourajr@gmail.com)
# Laboratorio de Processamento de Sinais - UFRJ

In [1]:
# Import Libs
import time
init_time = time.time()

import os
import pickle
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams['lines.linewidth'] = 2
plt.rcParams['legend.handlelength'] = 3
plt.rcParams['legend.borderpad'] = 0.3
plt.rcParams['legend.numpoints'] = 1
plt.rcParams['xtick.labelsize'] = 18
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['ytick.labelsize'] = 20

current_time = time.time()
print 'Time to import libraries: %1.3f seconds'%(current_time-init_time)

Time to import libraries: 4.346 seconds


In [2]:
# Read Data
from keras.utils import np_utils

init_time = time.time()

from sklearn import datasets

# import some data to play with
iris = datasets.load_iris()
data = iris.data # iris data
trgt = iris.target # iris data
iris_colors = ['b','r','g']
iris_labels = ['Setosa','Versicolor','Virginica']

# for classification -> target max sparse
trgt_sparse = np_utils.to_categorical(trgt)

current_time = time.time()
print 'Time to import data: %1.3f seconds'%(current_time-init_time)

Using Theano backend.


Time to import data: 0.910 seconds


In [3]:
from sklearn import cross_validation
from sklearn import preprocessing

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import keras.callbacks as callbacks
from keras.utils import np_utils
from keras.layers import Merge


def pcdc_extractor(inputdata, targetdata, trn_params=None):
    ''' 
        This function extracts the Cooperative Principal Components of Discrimination of a Dataset
        
        Parameters:
            inputdata: dataset with inputs
            
            targetdata: each class -> an integer
            
            trn_params: train parameters
            
            trn_params['n_folds'] = number of cross validation folds
            trn_params['n_inits'] = number of initializations
            trn_params['n_pcds'] = number of PCDs to be extracted
            trn_params['norm'] = normalization
            trn_params['learning_rate'] = learning rate
            trn_params['learning_decay'] = learning rate decay
            trn_params['momentum'] = momentum
            trn_params['nesterov'] = nesterov momentum
            trn_params['train_verbose'] = train verbose
            trn_params['n_epochs'] = number of epochs
            trn_params['batch_size'] = batch size
        
    '''
    
    if trn_params == None:
        trn_params = {}
        trn_params['n_folds'] = 2
        trn_params['n_inits'] = 2
        trn_params['n_pcds'] = 2
        trn_params['norm'] = 'none'
        trn_params['learning_rate'] = 0.01
        trn_params['learning_decay'] = 1e-6
        trn_params['momentum'] = 0.3
        trn_params['nesterov'] = True
        trn_params['train_verbose'] = False
        trn_params['n_epochs'] = 300
        trn_params['batch_size'] = 8

    print 'PCD Cooperative Extractor'
    print 'trn_params: ',trn_params
    
    # trained classifiers
    classifiers = {}
    trn_desc = {}
    pcds = {}
    
    CVO = cross_validation.StratifiedKFold(targetdata, trn_params['n_folds'])
    CVO = list(CVO)
    
    # from each class an integer -> target max sparse
    targetdata_sparse = np_utils.to_categorical(targetdata)
    
    for ifold in range(trn_params['n_folds']):
        train_id, test_id = CVO[ifold]

        # normalize data based in train set
        if trn_params['norm'] == 'mapstd':
            scaler = preprocessing.StandardScaler().fit(inputdata[train_id,:])
        elif trn_params['norm'] == 'mapstd_rob':
            scaler = preprocessing.RobustScaler().fit(inputdata[train_id,:])
        elif trn_params['norm'] == 'mapminmax':
            scaler = preprocessing.MinMaxScaler().fit(inputdata[train_id,:])
        
        if trn_params['norm'] != "none":
            norm_inputdata = scaler.transform(inputdata)
        else:
            norm_inputdata = inputdata
         
        
        classifiers[ifold] = {}
        trn_desc[ifold] = {}
        pcds[ifold] = {}
        
        
    return [pcds,classifiers,trn_desc]

In [4]:
# Extract PCD Cooperative
trn_params = {}
trn_params['n_folds'] = 2
trn_params['n_inits'] = 2
trn_params['n_pcds'] = 3
trn_params['norm'] = 'mapstd'
trn_params['learning_rate'] = 0.01
trn_params['learning_decay'] = 1e-4
trn_params['momentum'] = 0.9
trn_params['nesterov'] = True
trn_params['train_verbose'] = False
trn_params['n_epochs'] = 1000
trn_params['batch_size'] = 3


[pcds,classifiers,trn_desc] = pcdc_extractor(data,trgt, trn_params)

PCD Cooperative Extractor
trn_params:  {'nesterov': True, 'learning_rate': 0.01, 'n_inits': 2, 'batch_size': 3, 'n_epochs': 1000, 'train_verbose': False, 'learning_decay': 0.0001, 'momentum': 0.9, 'n_folds': 2, 'norm': 'mapstd', 'n_pcds': 3}
