In [15]:
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model, neighbors,datasets
from helpers import *

In [16]:
timestep=0.128040973111396;
fc=1/timestep;

In [50]:
yes_dxy_contents = sio.loadmat('NIRSdxy_yes_signal.mat')
no_dxy_contents = sio.loadmat('NIRSdxy_no_signal.mat')

yes_oxy_contents = sio.loadmat('NIRSoxy_yes_signal.mat')
no_oxy_contents = sio.loadmat('NIRSoxy_no_signal.mat')


In [51]:
def feature_extraction(signal,feature_dictionary):
    feature=[]
    if(feature_dictionary["fft_max_frequencies"]==1):
        f=np.linspace(0, fc, num=signal.size)
        spectrum= np.abs(np.fft.fft(signal));
        fft_max_frequencies=f[np.argsort(spectrum)[-3:]]
        feature=np.concatenate((feature,fft_max_frequencies), axis=0);
    

    if(feature_dictionary["mean"]==1):
        mean=np.mean(signal);
        feature=np.concatenate((feature,[mean]), axis=0)
    
    if(feature_dictionary["variance"]==1):
        variance=np.var(signal)
        feature=np.concatenate((feature,[variance]), axis=0)
    #crest factor 
    if(feature_dictionary["crest_factor"]==1):
        crest_factor=np.sum(np.power(signal,2))/signal.size
        feature=np.concatenate((feature,[crest_factor]), axis=0)
    return np.asarray(feature)
#.reshape([feature.size,1])
        
    

In [52]:
def get_train_matrix(channels, feature_dictionary,label):
    list_train=[]
    list_labels=[]
    dim=channels.shape
    for i in range (0,dim[1]):
        single_measurement=channels[0,i]
        dim1=single_measurement.shape
        for j  in range (0,dim1[1]):
            features=feature_extraction(single_measurement[:,j],feature_dictionary)
            list_train.append([features]);
        labels=get_labels(dim1[1],label);
        list_labels.append([labels]);
        
            
            
    train_TX=np.concatenate(list_train)
    labels=np.concatenate(list_labels,axis=1)
    
    return train_TX,labels.T.reshape(labels.size)

def get_labels(number, string):
    if(string=="No"):
        return np.zeros(number)    
    if(string=="Yes"):
        return np.ones(number)
    

In [44]:
def add_ones(tx):
    """
	Add column of ones to the dataset tx
    """
    return np.concatenate((tx, np.ones([tx.shape[0],1])), axis=1)

def standardize(x):
    """Standardize the data set x."""
    # Compute the mean for each column
    mean_x = np.mean(x, axis=0)
    x = x - mean_x
    # Compute the standard deviation for each column
    std_x = np.std(x, axis=0)
    x = x / std_x
    return np.array(x)



def build_poly(x, degree):
    """ Returns the polynomial basis functions for input data x, for j=2 up to j=degree."""
    new_cols=np.array([x**p for p in range(2,degree+1)]).T;
    return new_cols

def add_powers(tx, degree):
    for col in range(0,tx.shape[1]): 
            tx = np.concatenate((tx, build_poly(tx[:,col], degree)), axis=1)
    return tx


def build_k_indices(y, k_fold, seed):
    """Build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

def cross_validation_logistic_regularized(Y,X, degrees, lambdas, k_fold, seed, max_iters):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(Y, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(lambdas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(lambdas), k_fold])

    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        Y_cross_val_test = Y[k_index]
        X_cross_val_test = X[k_index,:]
        
        
        # Create the training set for this fold number
        mask = np.ones(len(Y), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        Y_cross_val_train = Y[mask] # select only True elements (ie train elements)
        X_cross_val_train = X[mask,:]
       
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,k], accuracies_test_by_fold[:,:,k] = cross_validation_one_fold_logistic_regularized\
            (Y_cross_val_train, Y_cross_val_test, X_cross_val_train, X_cross_val_test, \
                                 degrees, lambdas,max_iters)
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=2)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=2)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test,max_id_lambda = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_lambda=lambdas[max_id_lambda]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test,max_id_lambda]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test,max_id_lambda]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg , 'lambda=',best_lambda)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_lambda, best_accuracy_test, corresponding_accuracy_train                        


def cross_validation_one_fold_logistic_regularized(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, lambdas, max_iters):
    
    accuracies_train_by_deg = np.zeros([len(degrees),len(lambdas)])
    accuracies_test_by_deg = np.zeros([len(degrees),len(lambdas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train,deg )
        tx_cross_val_train = add_ones(tx_cross_val_train)
        
        tx_cross_val_test = add_powers(tx_cross_val_test,deg)
        tx_cross_val_test = add_ones(tx_cross_val_test)
        
        
        for lambda_id, single_lambda in enumerate(lambdas):
                
                print('>> Lambda', single_lambda, '<<')
                # Compute the best weights on the training set
                logreg = linear_model.LogisticRegression(C=1/single_lambda, class_weight="balanced",max_iter=max_iters)
                logreg.fit(tx_cross_val_train,y_cross_val_train )

                # Compute the predictions
                y_predicted_cross_val_train = logreg.predict(tx_cross_val_train)
                y_predicted_cross_val_test = logreg.predict(tx_cross_val_test)



                # Compute the accuracies for each degree
                accuracies_train_by_deg[deg_id,lambda_id] = \
                    np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
                accuracies_test_by_deg[deg_id,lambda_id] = \
                    np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)


                print(accuracies_test_by_deg[deg_id,lambda_id])
        
        
    return accuracies_train_by_deg, accuracies_test_by_deg






# DEOXY SIGNALS

In [49]:
#buildig the train matrix and labels

channels_no=no_dxy_contents["no_signal"]
channels_yes=yes_dxy_contents["yes_signal"]

# select which feature select
feature_dictionary = {
        "fft_max_frequencies" : 1, 
         "mean" : 1, 
         "variance" : 1,
         "crest_factor" : 1
         }



train_TX_yes,labels_yes=get_train_matrix(channels_yes, feature_dictionary,"Yes");
train_TX_no,labels_no=get_train_matrix(channels_no, feature_dictionary,"No");

train_TX=np.concatenate((train_TX_yes,train_TX_no),axis=0)
labels=np.concatenate((labels_yes,labels_no),axis=0)

NameError: name 'no_dxy_contents' is not defined

In [48]:
degrees = range(2,4)
lambdas = np.logspace(-8,-2,6)
k_fold=2
seed = 2
max_iters = 5000
print(train_TX.shape)

best_deg,best_lambda, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_logistic_regularized(labels,train_TX, degrees, lambdas, k_fold, seed, max_iters)


(900, 6)
--- Fold 0 ---
++ Degree 2 ++
>> Lambda 1e-08 <<
0.5
>> Lambda 1.58489319246e-07 <<
0.5
>> Lambda 2.51188643151e-06 <<
0.5
>> Lambda 3.98107170553e-05 <<
0.5
>> Lambda 0.00063095734448 <<
0.5
>> Lambda 0.01 <<
0.5
++ Degree 3 ++
>> Lambda 1e-08 <<
0.515555555556
>> Lambda 1.58489319246e-07 <<
0.515555555556
>> Lambda 2.51188643151e-06 <<
0.515555555556
>> Lambda 3.98107170553e-05 <<
0.515555555556
>> Lambda 0.00063095734448 <<
0.515555555556
>> Lambda 0.01 <<
0.515555555556
--- Fold 1 ---
++ Degree 2 ++
>> Lambda 1e-08 <<
0.513333333333
>> Lambda 1.58489319246e-07 <<
0.513333333333
>> Lambda 2.51188643151e-06 <<
0.513333333333
>> Lambda 3.98107170553e-05 <<
0.513333333333
>> Lambda 0.00063095734448 <<
0.513333333333
>> Lambda 0.01 <<
0.515555555556
++ Degree 3 ++
>> Lambda 1e-08 <<
0.517777777778
>> Lambda 1.58489319246e-07 <<
0.517777777778
>> Lambda 2.51188643151e-06 <<
0.517777777778
>> Lambda 3.98107170553e-05 <<
0.517777777778
>> Lambda 0.00063095734448 <<
0.517777777778


In [55]:
#logistic regression
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(train_TX, labels)
predicted_labels= logreg.predict(train_TX)
logreg_accuracy=get_accuracy(predicted_labels, labels)
print(logreg_accuracy)


#KNN 
clf = neighbors.KNeighborsClassifier(200)
clf.fit(train_TX, labels)
predicted_labels= clf.predict(train_TX)
KNN_accuracy=get_accuracy(predicted_labels, labels)
print(KNN_accuracy)


0.47333333333333333
0.4411111111111111
