In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import OneClassSVM
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import make_blobs
from sklearn.metrics import make_scorer, accuracy_score
from scipy.io import loadmat

In [2]:
def fix_mat(data):
    if data.dtype.names:
        new_data = dict()
        for name in data.dtype.names:
            new_data[name]=data[0][name][0]
        for k,v in new_data.items():
            if v.dtype.names:
                new_data[k] = fix_mat(v)
            else:
                new_data[k] = np.squeeze(v)
        return new_data
    else:
        return data

In [44]:
path = '/home/paolo/cvsa_ws/src/analysis_cvsa/detectShift/set_814.mat'
#path = '/home/paolo/cvsa_ws/src/analysis_cvsa/detectShift/set_features.mat'
dataset = loadmat(path)

#fix_data = fix_mat(dataset['fix_samples'])
#cf_data = fix_mat(dataset['cf_samples'])
fix_data = fix_mat(dataset['fix_814'])
cf_data = fix_mat(dataset['cf_814'])

cf_info = fix_mat(dataset['cf_info'])
fix_info = fix_mat(dataset['fix_info'])
typ = fix_mat(dataset['typ'])



In [45]:
percentual_train = 0.75
X_train = fix_data[:int(fix_data.shape[0] * percentual_train),:]
X_test  = fix_data[int(fix_data.shape[0] * percentual_train):,:]

y_train = np.ones((X_train.shape[0], 1))

In [46]:
# Define a custom scoring function for One-Class SVM
def one_class_svm_score(y_true, y_pred):
    """
    Custom scoring function for One-Class SVM.
    Since One-Class SVM labels inliers as 1 and outliers as -1,
    we assume all test samples should be inliers (i.e., 1).
    """
    return accuracy_score(y_true, y_pred)

scorer = make_scorer(one_class_svm_score)

# Set up the parameter grid for cross-validation
param_grid = {
    'nu': [0.1, 0.2, 0.40, 0.50],        # Fraction of expected outliers
    'gamma': ['scale', 'auto']      # Kernel coefficient
}

# Perform cross-validation using GridSearchCV
clf = GridSearchCV(OneClassSVM(kernel='rbf'), param_grid, cv=5, scoring=scorer,  verbose=3)
clf.fit(X_train, y_train)

# Best parameters and model evaluation
print("Best Parameters:", clf.best_params_)
print("Best Cross-Validation Score:", clf.best_score_)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV 1/5] END ...............gamma=scale, nu=0.1;, score=0.833 total time=   0.1s
[CV 2/5] END ...............gamma=scale, nu=0.1;, score=0.969 total time=   0.0s
[CV 3/5] END ...............gamma=scale, nu=0.1;, score=0.714 total time=   0.0s
[CV 4/5] END ...............gamma=scale, nu=0.1;, score=0.945 total time=   0.0s
[CV 5/5] END ...............gamma=scale, nu=0.1;, score=0.781 total time=   0.0s
[CV 1/5] END ...............gamma=scale, nu=0.2;, score=0.779 total time=   0.1s
[CV 2/5] END ...............gamma=scale, nu=0.2;, score=0.904 total time=   0.1s
[CV 3/5] END ...............gamma=scale, nu=0.2;, score=0.628 total time=   0.1s
[CV 4/5] END ...............gamma=scale, nu=0.2;, score=0.794 total time=   0.1s
[CV 5/5] END ...............gamma=scale, nu=0.2;, score=0.664 total time=   0.1s
[CV 1/5] END ...............gamma=scale, nu=0.4;, score=0.641 total time=   0.1s
[CV 2/5] END ...............gamma=scale, nu=0.4;,

In [47]:
model_svm = OneClassSVM(kernel='rbf', nu=clf.best_params_['nu'], gamma=clf.best_params_['gamma'])
model_svm.fit(X_train, y_train)

y_pred = model_svm.predict(X_test)
y_true = np.ones((X_test.shape[0], 1))
print("Test Accuracy:", accuracy_score(y_true, y_pred))

Test Accuracy: 0.884375


In [48]:
%matplotlib qt

cf_pred = model_svm.predict(cf_data)

last_trials = 30
fig, axes = plt.subplots(nrows=5, ncols=6, figsize=(15, 10))
axes = axes.flatten() 
sampleRate = 16
time_plot = 0.5

start_index = cf_info['startEvent'].shape[0] - last_trials

for i in range(start_index, cf_info['startEvent'].shape[0]): 
    c_plot = i - start_index
    c_start = cf_info['startEvent'][i]
    c_end = cf_info['endEvent'][i] + 1
    c_pred = cf_pred[c_start:c_end]
    
    axes[c_plot].scatter(range(0, c_pred.shape[0], 1), c_pred, s=1)
    xticks_ = np.arange(0, c_end-c_start, sampleRate*time_plot)
    xticks_ = np.append(xticks_, c_end-c_start)
    axes[c_plot].set_xticks(xticks_)
    axes[c_plot].set_xticklabels(xticks_/sampleRate)
    axes[c_plot].set_ylim(-1.1, 1.1)
    axes[c_plot].title.set_text('Trial ' + str(i+1))


        
plt.show()

## LOF

In [49]:
from sklearn.neighbors import LocalOutlierFactor
import math 

lof = LocalOutlierFactor(n_neighbors=250, novelty=True)
lof.fit(X_train)

In [50]:
%matplotlib qt

cf_pred = lof.predict(cf_data)

last_trials = 30
fig, axes = plt.subplots(nrows=5, ncols=6, figsize=(15, 10))
axes = axes.flatten() 
sampleRate = 16
time_plot = 0.5

start_index = cf_info['startEvent'].shape[0] - last_trials

for i in range(start_index, cf_info['startEvent'].shape[0]): 
    c_plot = i - start_index
    c_start = cf_info['startEvent'][i]
    c_end = cf_info['endEvent'][i] + 1
    c_pred = cf_pred[c_start:c_end]
    
    axes[c_plot].scatter(range(0, c_pred.shape[0], 1), c_pred, s=1)
    xticks_ = np.arange(0, c_end-c_start, sampleRate*time_plot)
    xticks_ = np.append(xticks_, c_end-c_start)
    axes[c_plot].set_xticks(xticks_)
    axes[c_plot].set_xticklabels(xticks_/sampleRate)
    axes[c_plot].set_ylim(-1.1, 1.1)
    axes[c_plot].title.set_text('Trial ' + str(i+1))


        
plt.show()