In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sktime.datasets import load_UCR_UEA_dataset
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score

In [2]:
class OneClassJKNN():
    
    def __init__(self, j, k): # j, k >= 1
        self.j = j
        self.k = k

        self.nbrs = NearestNeighbors(n_neighbors=self.k)

    def fit(self, train_data):
        self.nbrs.fit(train_data)
        distances, indices = self.nbrs.kneighbors()
           
        self.dist_avg = np.mean(distances, axis=1)
        
        return self

    def predict(self, test_samples, threshold):
        predictions = np.zeros(len(test_samples))
        sample_no = 0
        for test_sample in test_samples:
            distances, indices = self.nbrs.kneighbors(test_sample.reshape(1,-1), self.j)
            jnbrs_avg = np.mean(distances)
                      
            j_knbrs_avg = np.sum(self.dist_avg[indices])/self.j

            if jnbrs_avg <= (j_knbrs_avg * threshold):
                predictions[sample_no] = 1
            else:
                predictions[sample_no] = -1
            
            sample_no += 1

        return predictions

In [3]:
DATASETS = [
    'Yoga',
    'WormsTwoClass',
    'Wine',
    'Wafer',
    'TwoLeadECG',
    'Strawberry',
    'SemgHandGenderCh2', 
    'BeetleFly',
    'BirdChicken',
    'Computers',
    'DistalPhalanxOutlineCorrect',
    'Earthquakes',
    'ECG200',
    'ECGFiveDays',
    'FordA',
    'FordB',
    'HandOutlines',
    'ItalyPowerDemand', 
    'MiddlePhalanxOutlineCorrect',
    'Chinatown',
    'FreezerRegularTrain',
    'FreezerSmallTrain',
    'GunPointAgeSpan',
    'GunPointMaleVersusFemale',
    'GunPointOldVersusYoung',
    'PowerCons',
    'Coffee',
    'Ham',
    'Herring',
    'Lightning2',
    'MoteStrain',
    'PhalangesOutlinesCorrect',
    'ProximalPhalanxOutlineCorrect',
    'ShapeletSim',
    'SonyAIBORobotSurface1',
    'SonyAIBORobotSurface2',
    'ToeSegmentation1',
    'ToeSegmentation2',
    'HouseTwenty'
]

In [4]:
print(f'Starting the experiments with {len(DATASETS)} datasets from UCR...')

for dataset in DATASETS:
    print(f'\tDownloading the {dataset} dataset...')
    X_train, y_train = load_UCR_UEA_dataset(name=dataset, split='train')
    # Since the features from the sktime are instatiated as objects we have to manually convert them
    y_train = np.array(y_train, dtype=np.int32)

    X_test, y_test = load_UCR_UEA_dataset(name=dataset, split='test')
    y_test = np.array(y_test, dtype='int32') # Fixing the labels type
    print(f'\t\tDownload completed!')
    
    print('\tTransforming data to the correct format...')
    X_train_transformed = []
    for val in X_train.values:
        X_train_transformed.append(val[0].tolist())
    X_train = np.array(X_train_transformed)
    
    X_test_transformed = []
    for val in X_test.values:
        X_test_transformed.append(val[0].tolist())
    X_test = np.array(X_test_transformed)
    print('\t\tTransform completed!')
    
    print('\tStarting the classifier...')
    unique_labels = np.unique(y_train)

    for label in unique_labels:
        print(f'\t\tClassifying the label {label}...')
        
        X_train_ = X_train[y_train == label]
        y_train_ = y_train[y_train == label]
        
        occ_labels = [1 if x == label else -1 for x in y_test]
        
        clf = OneClassJKNN(j=2, k=5).fit(X_train_)
        result_labels = clf.predict(X_test, 1.35)
        
        print(f'\t\tReport:')
        print(f'\t\t\tAccuracy Score = {accuracy_score(occ_labels, result_labels)}')
        print(f'\t\t\tPrecision Score = {precision_score(occ_labels, result_labels)}')
        print(f'\t\t\tRecall Score = {recall_score(occ_labels, result_labels)}')
        print(f'\t\t\tF1 Score = {f1_score(occ_labels, result_labels)}')

    print('-'*100)

Starting the experiments with 39 datasets from UCR...
	Downloading the Yoga dataset...
		Download completed!
	Transforming data to the correct format...
		Transform completed!
	Starting the classifier...
		Classifying the label 1...
		Report:
			Accuracy Score = 0.562
			Precision Score = 0.5168443496801706
			Recall Score = 0.8700646087580761
			F1 Score = 0.6484751203852328
		Classifying the label 2...
		Report:
			Accuracy Score = 0.643
			Precision Score = 0.6133671742808798
			Recall Score = 0.9023024268823896
			F1 Score = 0.7302946361118108
----------------------------------------------------------------------------------------------------
	Downloading the WormsTwoClass dataset...
		Download completed!
	Transforming data to the correct format...
		Transform completed!
	Starting the classifier...
		Classifying the label 1...
		Report:
			Accuracy Score = 0.42857142857142855
			Precision Score = 0.4266666666666667
			Recall Score = 0.9696969696969697
			F1 Score = 0.59259259259259