# Projeto Marinha do Brasil
### Autor: Vinícius dos Santos Mello (viniciusdsmello@poli.ufrj.br)
### Laboratorio de Processamento de Sinais - UFRJ

In [1]:
import os
import pickle
import numpy as np
import time

from sklearn.decomposition import PCA
from sklearn.externals import joblib

from Functions import ClassificationAnalysisMultiProcessing as class_anal

init_time = time.time()

m_time = time.time()
print 'Time to import all libraries: '+str(m_time-init_time)+' seconds'

analysis_name = 'NeuralNetwork'
data_path = os.getenv('OUTPUTDATAPATH')
results_path = os.getenv('PACKAGE_NAME')

pict_results_path = results_path+'/'+analysis_name+'/picts'
files_results_path = results_path+'/'+analysis_name+'/output_files'

# Read data
# Check if LofarData has created...
m_time = time.time()

database = '4classes'
n_pts_fft = 1024
decimation_rate = 3
spectrum_bins_left = 400
development_flag = True
development_events = 400

if not os.path.exists('%s/%s/lofar_data_file_fft_%i_decimation_%i_spectrum_left_%i.jbl'%
                      (data_path,database,n_pts_fft,decimation_rate,spectrum_bins_left)):
    print 'No Files in %s/%s\n'%(data_path,database)
else:
    #Read lofar data
    [data,trgt,class_labels] = joblib.load('%s/%s/lofar_data_file_fft_%i_decimation_%i_spectrum_left_%i.jbl'%
                                           (data_path,database,n_pts_fft,decimation_rate,spectrum_bins_left))


    m_time = time.time()-m_time
    print 'Time to read data file: '+str(m_time)+' seconds'

    # correct format
    all_data = data
    all_trgt = trgt
    
    # Process data
    # unbalanced data to balanced data with random data creation of small classes

    # Same number of events in each class
    qtd_events_biggest_class = 0
    biggest_class_label = ''

    for iclass, class_label in enumerate(class_labels):
        if sum(all_trgt==iclass) > qtd_events_biggest_class:
            qtd_events_biggest_class = sum(all_trgt==iclass)
            biggest_class_label = class_label
        print "Qtd event of %s is %i"%(class_label,sum(all_trgt==iclass))
    print "\nBiggest class is %s with %i events"%(biggest_class_label,qtd_events_biggest_class)


    balanced_data = {}
    balanced_trgt = {}

    from Functions import DataHandler as dh
    m_datahandler = dh.DataHandlerFunctions()

    for iclass, class_label in enumerate(class_labels):
        if development_flag:
            class_events = all_data[all_trgt==iclass,:]
            if len(balanced_data) == 0:
                balanced_data = class_events[0:development_events,:]
                balanced_trgt = (iclass)*np.ones(development_events)
            else:
                balanced_data = np.append(balanced_data,
                                          class_events[0:development_events,:], 
                                          axis=0)
                balanced_trgt = np.append(balanced_trgt,(iclass)*np.ones(development_events))
        else:
            if len(balanced_data) == 0:
                class_events = all_data[all_trgt==iclass,:]
                balanced_data = m_datahandler.CreateEventsForClass(
                    class_events,qtd_events_biggest_class-(len(class_events)))
                balanced_trgt = (iclass)*np.ones(qtd_events_biggest_class)
            else:
                class_events = all_data[all_trgt==iclass,:]
                created_events = (m_datahandler.CreateEventsForClass(all_data[all_trgt==iclass,:],
                                                                     qtd_events_biggest_class-
                                                                     (len(class_events))))
                balanced_data = np.append(balanced_data,created_events,axis=0)
                balanced_trgt = np.append(balanced_trgt,
                                          (iclass)*np.ones(created_events.shape[0]),axis=0)
        
    all_data = balanced_data
    all_trgt = balanced_trgt

    # turn targets in sparse mode
    from keras.utils import np_utils
    trgt_sparse = np_utils.to_categorical(all_trgt.astype(int))

ImportError: cannot import name ClassificationAnalysisMultiProcessing

# Train process
## The train will modify one file and create three different files

### Log File:
This file will store basic information of all Package's trains and it will guide the analyses file to recognize which train information file should load. In each train this file should be appended with a new line contend the basic information to find the train information file (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Information File
This file will store full information of the train performed (all parameters) in its name (each train information file will have a different name). And it will guide which train classifier file or which train result file should be open for analysis (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Result File
This file will store the classifier result for all data and classification target (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis


In [3]:
%time

from sklearn import cross_validation
from Functions import LogFunctions as log

# Create a entry in log file
m_log = log.LogInformation()
date = m_log.CreateLogEntry("Classification",'NeuralNetwork')

# Create a train information file
n_folds = 4
n_inits = 4
norm = 'mapstd'

train_info = {}
train_info['n_folds'] = n_folds
train_info['n_inits'] = n_inits
train_info['norm'] = norm

# divide data in train and test for novelty detection
print 'Dividing data in train and test'
CVO = cross_validation.StratifiedKFold(all_trgt, n_folds)
CVO = list(CVO)
train_info['CVO'] = CVO

train_info['preprocessing_extraction_done'] = False
train_info['preprocessing_analysis_done'] = False
train_info['train_done'] = False
train_info['results_done'] = False
train_info['dev'] = development_flag

train_info_name = files_results_path+'/'+date+'_train_info.jbl'
joblib.dump([train_info],train_info_name,compress=9)

CPU times: user 1 µs, sys: 1e+03 ns, total: 2 µs
Wall time: 2.86 µs
Dividing data in train and test


['/home/vinicius.mello/Workspace/SonarAnalysis/Results/Classification/NeuralNetwork/output_files/2017_08_14_19_41_47_train_info.jbl']

In [2]:
# Read log files
from Functions import LogFunctions as log
mlog = log.LogInformation()
log_entries = mlog.RecoverLogEntries(package_name="Classification")
lastTrain = log_entries[np.max(log_entries.keys())]
print 'Last Train\nDate: {0}\nPackage: {1}\n'.format(lastTrain['date'],lastTrain['package'])
for ilog in log_entries.values()[::-1]:
    if ilog['package'] == analysis_name:
        print ilog

Last Train
Date: 2017_08_14_19_41_47
Package: NeuralNetwork

{'date': '2017_08_14_19_41_47', 'package': 'NeuralNetwork'}
{'date': '2017_08_14_16_02_17', 'package': 'NeuralNetwork'}
{'date': '2017_08_14_15_23_46', 'package': 'NeuralNetwork'}
{'date': '2017_08_07_11_05_17', 'package': 'NeuralNetwork'}
{'date': '2017_08_06_08_27_48', 'package': 'NeuralNetwork'}
{'date': '2017_08_05_23_34_21', 'package': 'NeuralNetwork'}
{'date': '2017_08_03_07_40_59', 'package': 'NeuralNetwork'}
{'date': '2017_08_02_22_38_31', 'package': 'NeuralNetwork'}
{'date': '2017_08_02_21_39_09', 'package': 'NeuralNetwork'}
{'date': '2017_08_01_22_41_15', 'package': 'NeuralNetwork'}
{'date': '2017_08_01_16_06_20', 'package': 'NeuralNetwork'}
{'date': '2017_08_01_10_29_57', 'package': 'NeuralNetwork'}
{'date': '2017_07_31_23_13_18', 'package': 'NeuralNetwork'}
{'date': '2017_07_31_21_19_37', 'package': 'NeuralNetwork'}
{'date': '2017_07_29_20_02_39', 'package': 'NeuralNetwork'}
{'date': '2017_07_29_01_19_00', 'packag

In [3]:
a = class_anal.NeuralClassification(name         = analysis_name,
                                    preproc_path = files_results_path,
                                    train_path   = files_results_path,
                                    anal_path    = files_results_path)
date = lastTrain['date']
n_folds = 4
n_inits = 4
a.trn_info = class_anal.TrnInformation(date     = date, 
                                       n_folds  = n_folds,
                                       verbose  = False,
                                       train_verbose= False,
                                       n_inits  = n_inits,
                                       patience = 25,
                                       n_epochs = 500,
                                       batch_size = 512)
a.trn_info.SplitTrainSet(all_trgt)
a.trn_info.Print()

Class TrnInformation
	Date 2017_08_14_19_41_47
	Number of Folds 4
	Number of Initializations: 4
	Normalization: mapstd
	CVO is not None
	Verbose is False
	Train Verbose is False


In [None]:
def train(parameters, fold): #data, trgt, n_neurons=1, trn_info=None, fold=0):
        #print 'NeuralClassication train function'
        data = parameters[0]
        trgt = parameters[1]
        n_neurons = parameters[2]
        trn_info = parameters[3]

        if fold > trn_info.n_folds or fold < -1:
            print 'Invalid Fold...'
            return None
        
        [data_preproc, trgt_preproc] = preprocess(data,trgt,
                                                       trn_info=trn_info,fold=fold)
        # Check if the file exists
        file_name = '%s/%s_%s_train_fold_%i_neurons_%i_model.h5'%(preproc_path,
                                                                  trn_info.date,
                                                                  name,fold,n_neurons)
        
        if not os.path.exists(file_name):
            best_init = 0
            best_loss = 999
            best_model = None
            best_desc = {}
            
            train_id, test_id = trn_info.CVO[fold]
            
            for i_init in range(trn_info.n_inits):
                print 'Init: %i of %i'%(i_init+1,trn_info.n_inits)
                
                model = Sequential()
                model.add(Dense(n_neurons, input_dim=data.shape[1], init="uniform"))
                model.add(Activation('softplus'))
                model.add(Dense(trgt_preproc.shape[1], init="uniform"))
                model.add(Activation('softmax'))
        
                adam = Adam(lr = trn_info.learning_rate,
                            beta_2 = trn_info.beta_2,
                epsilon = trn_info.epsilon)
                
                model.compile(loss='mean_squared_error',
                    optimizer=adam,
                    metrics=['accuracy'])
            
                # Train model
                earlyStopping = callbacks.EarlyStopping(monitor='val_loss',
                                                        patience=trn_info.patience,
                                                        verbose=trn_info.train_verbose,
                                                        mode='auto')
                init_trn_desc = model.fit(data_preproc[train_id], trgt_preproc[train_id],
                                          nb_epoch=trn_info.n_epochs,
                                          batch_size=trn_info.batch_size,
                                          callbacks=[earlyStopping],
                                          verbose=trn_info.verbose,
                                          validation_data=(data_preproc[test_id],
                                                           trgt_preproc[test_id]),
                                          shuffle=True)
        	
                if np.min(init_trn_desc.history['val_loss']) < best_loss:
                    best_init = i_init
                    best_loss = np.min(init_trn_desc.history['val_loss'])
                    best_model = model
                    best_desc['epochs'] = init_trn_desc.epoch
                    best_desc['acc'] = init_trn_desc.history['acc']
                    best_desc['loss'] = init_trn_desc.history['loss']
                    best_desc['val_loss'] = init_trn_desc.history['val_loss']
                    best_desc['val_acc'] = init_trn_desc.history['val_acc']
        
            # Save the model
            file_name = '%s/%s_%s_train_fold_%i_neurons_%i_model.h5'%(train_path,
                                                                      trn_info.date,
                                                                      name,fold,n_neurons)
            best_model.save(file_name)
        
            # Save the descriptor
            file_name = '%s/%s_%s_train_fold_%i_neurons_%i_trn_desc.jbl'%(train_path,
                                                                         trn_info.date,
                                                                         name,fold,n_neurons)
            joblib.dump([best_desc],file_name,compress=9)
        else:
            # Load the model
            file_name = '%s/%s_%s_train_fold_%i_neurons_%i_model.h5'%(train_path,
                                                                      trn_info.date,
                                                                      name,fold,n_neurons)
            best_model = load_model(file_name)
        
            # Load the descriptor
            file_name = '%s/%s_%s_train_fold_%i_neurons_%i_trn_desc.jbl'%(train_path,
                                                                     trn_info.date,
                                                                     name,fold,n_neurons)
            [best_desc] = joblib.load(file_name)
        
        return [best_model, best_desc]

In [26]:
from multiprocessing import Pool
from functools import partial
from Functions import ClassificationAnalysisMultiProcessing as class_anal


n_neurons = 20

parameters = np.zeros(4, dtype=object)
# Constant parameters
parameters[0] = all_data
parameters[1] = trgt_sparse
parameters[2] = n_neurons
parameters[3] = a.trn_info

def func(ifold, parameters, date = lastTrain['date'], a = class_anal.NeuralClassification(
                                                                            name = analysis_name,
                                                                            preproc_path = files_results_path,
                                                                            train_path   = files_results_path,
                                                                            anal_path    = files_results_path)):
    n_folds = 4
    n_inits = 4
        
    a.trn_info = class_anal.TrnInformation(date     = date, 
                                           n_folds  = n_folds,
                                           verbose  = False,
                                           train_verbose= False,
                                           n_inits  = n_inits,
                                           patience = 25,
                                           n_epochs = 500,
                                           batch_size = 512)
    a.trn_info.SplitTrainSet(all_trgt)
    #a.trn_info.Print()
    return ifold, a.train(parameters = parameters, fold = ifold)
    
#### end func() ####

num_processes = 4
call = partial(func, parameters, date)
p = Pool(processes = num_processes)
n_folds = 4
folds = range(n_folds)
p.map(call, folds)


AttributeError: 'str' object has no attribute 'n_folds'

In [23]:
#%matplotlib inline  
#a.analysis_top_sweep(all_data,trgt_sparse,trn_info=a.trn_info, min_neurons=10, max_neurons=50)

In [None]:
#[best_model, best_desc] = a.train(all_data,trgt_sparse,trn_infoexit=a.trn_info, n_neurons=25, fold=0)
#loss_vector = best_desc['val_loss']
#acc_vector  = best_desc['val_acc']
#print loss_vector
#print acc_vector

In [None]:
%matplotlib inline 
# Analysis for independet topology with respect to number of neurons
n_neurons = 30
for ifold in range(n_folds):
    a.analysis_train_plot(data = all_data,trgt = trgt_sparse,trn_info = a.trn_info, n_neurons= n_neurons, fold = ifold)

In [None]:
%matplotlib inline  
print "Results for %i neurons"%n_neurons
for ifold in range(n_folds):
    a.analysis_conf_mat(data = all_data,
                        trgt = trgt_sparse,
                        trn_info = a.trn_info,
                        class_labels = class_labels.values(),
                        n_neurons = n_neurons,
                        fold = ifold)

In [None]:
%matplotlib inline  
print "Results for %i neurons"%n_neurons
for ifold in range(n_folds):
    a.analysis_output_hist(data = all_data,trgt = trgt_sparse,trn_info = a.trn_info, n_neurons= n_neurons, fold = ifold)

# Conclusion
