In [None]:
# Projeto Marinha do Brasil

# Autor: Natanael Junior (natmourajr@gmail.com)
# Laboratorio de Processamento de Sinais - UFRJ

In [1]:
import os
import pickle
import numpy as np
import time

from sklearn.decomposition import PCA
from sklearn.externals import joblib

init_time = time.time()

m_time = time.time()
print 'Time to import all libraries: '+str(m_time-init_time)+' seconds'

outputpath = os.environ['OUTPUTDATAPATH']
main_analysis_path = os.environ['SONAR_WORKSPACE']
log_analysis_path = os.environ['PACKAGE_OUTPUT']
result_analysis_path = os.environ['PACKAGE_OUTPUT']+'/kPCA'
pict_results_path = os.environ['PACKAGE_OUTPUT']+'/kPCA/picts'
files_results_path = os.environ['PACKAGE_OUTPUT']+'/kPCA/output_files'

# Read data
# Check if LofarData has created...
m_time = time.time()


subfolder = '4classes'
n_pts_fft = 1024
decimation_rate = 3

if(not os.path.exists(outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))):
    print outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
        subfolder,n_pts_fft,decimation_rate)+' doesnt exist...please create it'
    exit()
    
#Read lofar data
[data,class_labels] = joblib.load(outputpath+'/'+
                                  'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))
m_time = time.time()-m_time
print 'Time to read data file: '+str(m_time)+' seconds'

Time to import all libraries: 4.72068786621e-05 seconds
Time to read data file: 6.58081889153 seconds


In [2]:
# Get data in correct format
from keras.utils import np_utils

# create a full data vector
all_data = {};
all_trgt = {};

for iclass, class_label in enumerate(class_labels):
    for irun in range(len(data[iclass])):
        if len(all_data) == 0:
            all_data = data[iclass][irun]['Signal']
            all_trgt = (iclass)*np.ones(data[iclass][irun]['Signal'].shape[1])
        else:
            all_data = np.append(all_data,data[iclass][irun]['Signal'],axis=1)
            all_trgt = np.append(all_trgt,(iclass)*np.ones(data[iclass][irun]
                                                           ['Signal'].shape[1]),axis=0)

all_data = all_data.transpose()

# turn targets in sparse mode
trgt_sparse = np_utils.to_categorical(all_trgt)

Using Theano backend.
  Y = np.zeros((len(y), nb_classes))
  Y[i, y[i]] = 1.


In [15]:
all_data = all_data[:,0:4]
all_data.shape

(25844, 4)

In [21]:
# Train Process
from Functions import LogFunctions as log

# Create a entry in log file
m_log = log.LogInformation()
date = m_log.CreateLogEntry(package_name="PreProcessing",analysis_name='kPCA')

# Create a train information file
n_folds = 2
n_inits = 2
norm = 'mapstd'
kernel = 'rbf' #“linear” | “poly” | “rbf” | “sigmoid” | “cosine” | “precomputed” 

train_info = {}
train_info['n_folds'] = n_folds
train_info['norm'] = norm
train_info['n_inits'] = n_inits
train_info['kernel'] = kernel

train_info_name = result_analysis_path+'/train_info_files'+'/'+date+'_train_info.jbl'
kpca_file_name = result_analysis_path+'/output_files'+'/'+date+'_kpcas_files.jbl'

from sklearn import cross_validation
from sklearn import preprocessing
from sklearn.decomposition import KernelPCA

CVO = cross_validation.StratifiedKFold(all_trgt, train_info['n_folds'])
CVO = list(CVO)
train_info['CVO'] = CVO

joblib.dump([train_info],train_info_name,compress=9)

kpcas = {}

for ifold in range(train_info['n_folds']):
    train_id, test_id = CVO[ifold]
    
    # normalize data based in train set
    if train_info['norm'] == 'mapstd':
        scaler = preprocessing.StandardScaler().fit(all_data[train_id,:])
    elif train_info['norm'] == 'mapstd_rob':
        scaler = preprocessing.RobustScaler().fit(all_data[train_id,:])
    elif train_info['norm'] == 'mapminmax':
        scaler = preprocessing.MinMaxScaler().fit(all_data[train_id,:])
        
    norm_all_data = scaler.transform(all_data)
    kpca = KernelPCA(kernel=train_info['kernel'], fit_inverse_transform=True, gamma=10)
    kpca.fit(norm_all_data[train_id])
    kpcas[ifold] = kpca
    
joblib.dump([kpcas],kpca_file_name,compress=9)

KeyboardInterrupt: 

In [20]:
norm_all_data.shape

(25844, 4)

In [7]:
# Read log files
from Functions import LogFunctions as log
mlog = log.LogInformation()
log_entries = mlog.RecoverLogEntries(package_name="PreProcessing")
print log_entries

{0: {'date': '2016_08_22_19_20_34', 'package': 'PCALinear'}, 1: {'date': '2016_08_22_19_37_35', 'package': 'PCALinear'}, 2: {'date': '2016_08_24_14_30_39', 'package': 'PCDIndependent'}, 3: {'date': '2016_08_25_19_57_27', 'package': 'kPCA'}, 4: {'date': '2016_08_25_19_59_26', 'package': 'kPCA'}, 5: {'date': '2016_08_25_19_59_50', 'package': 'kPCA'}}
