In [1]:
# Projeto Marinha do Brasil

# Autor: Natanael Junior (natmourajr@gmail.com)
# Laboratorio de Processamento de Sinais - UFRJ

In [1]:
import os
import pickle
import numpy as np
import time

init_time = time.time()

from sklearn import preprocessing
from sklearn import cross_validation
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.externals import joblib

m_time = time.time()
print 'Time to import all libraries: '+str(m_time-init_time)+' seconds'

outputpath = os.environ['OUTPUTDATAPATH']
main_analysis_path = os.environ['SONAR_WORKSPACE']
log_analysis_path = os.environ['PACKAGE_OUTPUT']
result_analysis_path = os.environ['PACKAGE_OUTPUT']+'/PCASingleClassSVM'
# Read data
# Check if LofarData has created...
m_time = time.time()


subfolder = '4classes'
n_pts_fft = 1024
decimation_rate = 3

if(not os.path.exists(outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))):
    print outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
        subfolder,n_pts_fft,decimation_rate)+' doesnt exist...please create it'
    exit()
    
#Read lofar data
[data,class_labels] = joblib.load(outputpath+'/'+'LofarData_%s_%i_fft_pts_%i_decimation_rate.jbl'%(
            subfolder,n_pts_fft,decimation_rate))
m_time = time.time()-m_time
print 'Time to read data file: '+str(m_time)+' seconds'



Time to import all libraries: 1.21765494347 seconds
Time to read data file: 1.61923003197 seconds


# Train process
## The train will modify one file and create three different files

### Log File:
This file will store basic information of all Package's trains and it will guide the analyses file to recognize which train information file should load. In each train this file should be appended with a new line contend the basic information to find the train information file (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Information File
This file will store full information of the train performed (all parameters) in its name (each train information file will have a different name). And it will guide which train classifier file or which train result file should be open for analysis (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis

### Train Classifier File
This file will store the classifier after train, the folds information and the train evolution (when this exists) (NATIVE FORMAT) or (PYTHON FORMAT) - This file should not be access by all programs

### Train Result File
This file will store the classifier result for all data and classification target (TXT FORMAT) or (PYTHON FORMAT) - This file should be access by all programs (MatLab and Python) for Analysis


In [7]:
# Train Process
from Functions import LogFunctions as log

# Create a entry in log file
m_log = log.LogInformation()
date = m_log.CreateLogEntry("NoveltyDetection",'PCASingleClassSVM')

# Create a train information file
n_folds = 2
n_pcas = 50
norm = 'mapstd'
nu_values = np.array([0.001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
gamma_value = 0.1

train_info = {}
train_info['n_folds'] = n_folds
train_info['n_pcas'] = n_pcas
train_info['norm'] = norm
train_info['nu_values'] = nu_values
train_info['gamma_value'] = gamma_value

train_info_name = result_analysis_path+'/train_info_files'+'/'+date+'_train_info.jbl'
class_info_name = result_analysis_path+'/classifiers_files'+'/'+date+'_classifiers.jbl'
result_file_base_name = result_analysis_path+'/result_files'+'/'+date

#joblib.dump([train_info],train_info_name,compress=9)

# create a full data vector
all_data = {};
all_trgt = {};

for iclass, class_label in enumerate(class_labels):
    for irun in range(len(data[iclass])):
        if len(all_data) == 0:
            all_data = data[iclass][irun]['Signal']
            all_trgt = (iclass)*np.ones(data[iclass][irun]['Signal'].shape[1])
        else:
            all_data = np.append(all_data,data[iclass][irun]['Signal'],axis=1)
            all_trgt = np.append(all_trgt,(iclass)*np.ones(data[iclass][irun]['Signal'].shape[1]),axis=0)

all_data = all_data.transpose()

# Train the Classifier

# Classifiers
classifiers = {}
for novelty_class, novelty_label in enumerate(class_labels):
    classifiers[novelty_class] = {}
    print 'Train Novelty Classifier for %s'%(novelty_label)

    #data vectors
    known_data = {};
    novelty_data = {};
    
    # target vector
    known_trgt = {};

    # known classes loop
    for known_class, known_label in enumerate(class_labels):
        if known_class == novelty_class:
            continue
        #print 'Create known data vector for %s' % known_label
        
        # known class run loop
        for irun in range(len(data[known_class])):
            if len(known_data) == 0:
                known_data = data[known_class][irun]['Signal']
                known_trgt = (known_class)*np.ones(data[known_class][irun]['Signal'].shape[1])
            else:
                known_data = np.append(known_data,data[known_class][irun]['Signal'],axis=1)
                known_trgt = np.append(known_trgt,(known_class)*np.ones(data[known_class][irun]['Signal'].shape[1])
                                       ,axis=0)
    known_data = known_data.transpose()
    
    CVO = cross_validation.StratifiedKFold(known_trgt, n_folds)
    CVO = list(CVO)
    train_info['CVO_%s'%(novelty_label)] = CVO

    # for: folds
    for ifold in range(len(CVO)):
        classifiers[novelty_class][ifold] = {}
        #if not ifold == 0 :
        #    break
        print 'Starting Training Processing for fold: ', ifold
        
        # split data in trn set, tst set
        train_id, test_id = CVO[ifold]

        # normalize data based in train set
        if train_info['norm'] == 'mapstd':
            scaler = preprocessing.StandardScaler().fit(known_data[train_id,:])
        elif train_info['norm'] == 'mapstd_rob':
            scaler = preprocessing.RobustScaler().fit(known_data[train_id,:])
        elif train_info['norm'] == 'mapminmax':
            scaler = preprocessing.MinMaxScaler().fit(known_data[train_id,:])
        
        norm_known_data = scaler.transform(known_data)
        norm_all_data = scaler.transform(all_data)
        
        pca = PCA(n_components=train_info['n_pcas'])
        norm_known_data = pca.fit(norm_known_data[train_id,:]).transform(norm_known_data)
        norm_all_data = pca.transform(norm_all_data)
        
        
        for nu_id, nu_value in enumerate(train_info['nu_values']):
            classifiers[novelty_class][ifold][nu_value] = {}
            # single class (known vs novelty)
            
            # Training Novelty Detector
            novelty_detector = svm.OneClassSVM(nu=nu_value, kernel="rbf", gamma=gamma_value)
            novelty_detector.fit(norm_known_data[train_id,:])
            classifiers[novelty_class][ifold][nu_value]['NoveltyDetector'] = novelty_detector
            
            # known classes loop (known vs other known)
            known_classifier = {}
            all_outputs = []
            for known_class, known_label in enumerate(class_labels):
                if known_class == novelty_class: continue
                known_classifier[known_class-(known_class>novelty_class)] = []
                class_idx = np.nonzero(known_trgt == known_class)[0]
                idx = np.intersect1d(class_idx,train_id)
                known_classifier[known_class-(known_class>novelty_class)] = (svm.OneClassSVM
                                                                             (nu=nu_value, kernel="rbf", 
                                                                              gamma=gamma_value))
                known_classifier[known_class-(known_class>novelty_class)].fit(norm_known_data[idx,:])
                classifiers[novelty_class][ifold][nu_value][known_label] = known_classifier[known_class-(
                                                                                known_class>novelty_class)]
                a = classifiers[novelty_class][ifold][nu_value][known_label].predict(norm_all_data)
                if len(all_outputs) == 0:
                    all_outputs = a[:,np.newaxis]
                else:
                    all_outputs = np.append(all_outputs,a[:,np.newaxis],axis=1)
            a = classifiers[novelty_class][ifold][nu_value]['NoveltyDetector'].predict(norm_all_data)
            all_outputs = np.append(all_outputs,a[:,np.newaxis],axis=1)
            all_outputs = np.append(all_outputs,all_trgt[:,np.newaxis],axis=1)
            np.savetxt('%s_%s_novelty_%i_fold_%f_nu.txt'%
                                (result_file_base_name, novelty_label,ifold,nu_value),all_outputs)
            
            
# Create a train classifier file
joblib.dump([classifiers],class_info_name,compress=9)

joblib.dump([train_info],train_info_name,compress=9)

    

Train Novelty Classifier for ClassA
Starting Training Processing for fold:  0
Starting Training Processing for fold:  1
Train Novelty Classifier for ClassB
Starting Training Processing for fold:  0
Starting Training Processing for fold:  1
Train Novelty Classifier for ClassC
Starting Training Processing for fold:  0
Starting Training Processing for fold:  1
Train Novelty Classifier for ClassD
Starting Training Processing for fold:  0
Starting Training Processing for fold:  1


['/home/natmourajr/Workspace/Doutorado/SonarAnalysis/Results/NoveltyDetection/PCASingleClassSVM/train_info_files/2016_12_01_19_09_08_train_info.jbl']