In [1]:
# Projeto Marinha do Brasil

# Autor: Natanael Junior (natmourajr@gmail.com)
# Laboratorio de Processamento de Sinais - UFRJ

In [6]:
import os
import pickle
import numpy as np
import time

from sklearn.decomposition import PCA
from sklearn.externals import joblib

init_time = time.time()

m_time = time.time()
print 'Time to import all libraries: '+str(m_time-init_time)+' seconds'

outputpath = os.environ['OUTPUTDATAPATH']
main_analysis_path = os.environ['SONAR_WORKSPACE']
log_analysis_path = os.environ['PACKAGE_OUTPUT']
result_analysis_path = os.environ['PACKAGE_OUTPUT']+'/PCDDeflaction'
pict_results_path = os.environ['PACKAGE_OUTPUT']+'/PCDDeflaction/picts'
files_results_path = os.environ['PACKAGE_OUTPUT']+'/PCDDeflaction/output_files'

# Read data
# Check if LofarData has created...
m_time = time.time()


subfolder = '4classes'
n_pts_fft = 1024
decimation_rate = 3

if(not os.path.exists(outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))):
    print outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
        subfolder,n_pts_fft,decimation_rate)+' doesnt exist...please create it'
    exit()
    
#Read lofar data
[data,class_labels] = joblib.load(outputpath+'/'+
                                  'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))
m_time = time.time()-m_time
print 'Time to read data file: '+str(m_time)+' seconds'

Time to import all libraries: 5.50746917725e-05 seconds
Time to read data file: 2.58621692657 seconds


In [7]:
# Get data in correct format
from keras.utils import np_utils

# create a full data vector
all_data = {};
all_trgt = {};

for iclass, class_label in enumerate(class_labels):
    for irun in range(len(data[iclass])):
        if len(all_data) == 0:
            all_data = data[iclass][irun]['Signal']
            all_trgt = (iclass)*np.ones(data[iclass][irun]['Signal'].shape[1])
        else:
            all_data = np.append(all_data,data[iclass][irun]['Signal'],axis=1)
            all_trgt = np.append(all_trgt,(iclass)*np.ones(data[iclass][irun]
                                                           ['Signal'].shape[1]),axis=0)

all_data = all_data.transpose()

# turn targets in sparse mode
trgt_sparse = np_utils.to_categorical(all_trgt)

## Train process
### The train will modify one file and create three different files

### Log File:

This file will store basic information of all Package's trains and it will guide the analyses file to recognize which train information file should load. In each train this file should be appended with a new line contend the basic information to find the train information file (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Information File

This file will store full information of the train performed (all parameters) in its name (each train information file will have a different name). And it will guide which train classifier file or which train result file should be open for analysis (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Classifier File

This file will store the classifier after train, the folds information and the train evolution (when this exists) (NATIVE FORMAT) or (PYTHON FORMAT) - This file should not be access by all programs

### Train Result File

This file will store the classifier result for all data and classification target (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

In [44]:
# Train Process
from Functions import LogFunctions as log

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import keras.callbacks as callbacks
from keras.utils import np_utils

# Create a entry in log file
m_log = log.LogInformation()
date = m_log.CreateLogEntry(package_name="PreProcessing",analysis_name='PCDDeflaction')

# Create a train information file
n_folds = 2
n_inits = 1
n_pcds = 10
norm = 'mapstd'

train_info = {}
train_info['n_folds'] = n_folds
train_info['n_inits'] = n_inits
train_info['n_pcds'] = n_pcds
train_info['norm'] = norm

train_info_name = result_analysis_path+'/train_info_files'+'/'+date+'_train_info.jbl'
classifiers_name = result_analysis_path+'/classifiers_files'+'/'+date+'_classifiers'
pdf_file_name = result_analysis_path+'/output_files'+'/'+date+'_pcds'

from sklearn import cross_validation
from sklearn import preprocessing

CVO = cross_validation.StratifiedKFold(all_trgt, train_info['n_folds'])
CVO = list(CVO)
train_info['CVO'] = CVO

joblib.dump([train_info],train_info_name,compress=9)

# train classifiers
classifiers = {}
trn_desc = {}
pcds = {}

# try to estimate time to be done...
total_trains = train_info['n_folds']*train_info['n_inits']
nn_trained = 0

for ifold in range(train_info['n_folds']):
    train_id, test_id = CVO[ifold]
    
    # normalize data based in train set
    if train_info['norm'] == 'mapstd':
        scaler = preprocessing.StandardScaler().fit(all_data[train_id,:])
    elif train_info['norm'] == 'mapstd_rob':
        scaler = preprocessing.RobustScaler().fit(all_data[train_id,:])
    elif train_info['norm'] == 'mapminmax':
        scaler = preprocessing.MinMaxScaler().fit(all_data[train_id,:])
        
    norm_all_data = scaler.transform(all_data)
       
    print 'Train Process for %i Fold of %i Folds'%(ifold,train_info['n_folds'] )
    pcds[ifold] = {}
    for ipcd in range(train_info['n_pcds']):
        best_init = 0
        best_loss = 999
        if ipcd == 0:
            # first pcd: regular NN
            for i_init in range(train_info['n_inits']):
                #print 'Init: %i of %i'%(i_init,train_info['n_inits'])
                model = Sequential()
                model.add(Dense(all_data.shape[1],
                                input_dim=all_data.shape[1], 
                                init='uniform'),)
                model.add(Activation('linear'))
                model.add(Dense(1, init='uniform'))
                model.add(Activation('tanh'))
                model.add(Dense(50, init='uniform'))
                model.add(Activation('tanh'))
                model.add(Dense(trgt_sparse.shape[1], init='uniform')) 
                model.add(Activation('tanh'))
                
                sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
                model.compile(loss='mean_squared_error', optimizer=sgd
                      ,metrics=['accuracy'])
                
                earlyStopping = callbacks.EarlyStopping(monitor='val_loss', patience=25, 
                                            verbose=0, mode='auto')
                # Train model
                init_trn_desc = model.fit(norm_all_data[train_id], trgt_sparse[train_id], 
                                nb_epoch=50, 
                                batch_size=8,
                                callbacks=[earlyStopping], 
                                verbose=0,
                                validation_data=(all_data[test_id],trgt_sparse[test_id]),
                                shuffle=True)
                if np.min(init_trn_desc.history['val_loss']) < best_loss:
                    best_init = i_init
                    best_loss = np.min(init_trn_desc.history['val_loss'])
                    classifiers[ifold] = model
                    trn_desc[ifold] = init_trn_desc
                    pcd_test = model.layers[0].get_weights()
                    pcds[ifold][ipcd] = pcd_test[0]
                
        else:
            break
            # from second to end: frezze previous and train only last one
            for i_init in range(train_info['n_inits']):
                #print 'Init: %i of %i'%(i_init,train_info['n_inits'])
                
classifiers_file = open(classifiers_name+'.pickle', "wb")
pickle.dump([classifiers,trn_desc],classifiers_file)
classifiers_file.close()

pdfs_file = open(pdf_file_name+'.pickle', "wb")
pickle.dump([pcds],pdfs_file)
pdfs_file.close()

Train Process for 0 Fold of 2 Folds
Train Process for 1 Fold of 2 Folds


In [43]:
trn_desc[0].history['loss']

[0.13671943350932139,
 0.11903581799181444,
 0.11816056331616881,
 0.11798252544506592,
 0.11767119092284531,
 0.11756109150651162,
 0.11679834748156499,
 0.11457175052499967,
 0.11311531101319085,
 0.11245439042578953,
 0.1121803049749749,
 0.11204924891110997,
 0.11182465142829989,
 0.1116804906263891,
 0.1116175043290913,
 0.11141844525403924,
 0.11130314698661682,
 0.11112289180104536,
 0.11099181282957146,
 0.1110051277867493,
 0.11088262431259523,
 0.11076527807644466,
 0.1107576062734001,
 0.11045730272958858,
 0.11058870252560248,
 0.11053599257253657,
 0.11043499493936149,
 0.11047695068893672,
 0.11039437994109695,
 0.11012151630109664,
 0.11016656437042688,
 0.11016744411692349,
 0.11020887540401796,
 0.11007888760523943,
 0.11003310899299583,
 0.11008824100456979,
 0.11001645948366801,
 0.10999903945739116]