In [1]:
import numpy as np
import pickle
import os
from src.ann_module import ANN
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score

### Data Imports and recollection

In [2]:
dir_path = r'./data/features/lab_single'
source_file_nr = len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))])


for number in range(source_file_nr):
    
    with open(os.path.join(dir_path, f'onset_scenario_{number + 1}'), 'rb') as file:
        signal_extracted_scenario = pickle.load(file)
    # create a single array with all the feature files
    if number == 0:
        signal_extracted = signal_extracted_scenario
    else:
        signal_extracted = np.append(signal_extracted, signal_extracted_scenario, axis=0)

In [3]:
dir_path_multi = r'./data/features/lab_multi'
test_file_names = ['onset_scenario_3', 'onset_scenario_9', 'onset_scenario_12']
test_cases_names = ['A', 'B', 'C']
test_cases = {}
pd_clusters = [[2], [3], [3, 4, 5]]


for i, test_f in enumerate(test_file_names):
    
    with open(os.path.join(dir_path_multi, test_f), 'rb') as file:
        signal_extracted_scenario = pickle.load(file)
        
        # Cluster labels from clustering are unordered and do not correspond to the acual class. The proper PD-int class is assigned by study of the PRPD
        cluster_labels_old = signal_extracted_scenario[:, 2].copy()
        cluster_labels_new = np.ones(len(cluster_labels_old))
        cluster_labels_new[np.isin(cluster_labels_old, pd_clusters[i])] = 0

        signal_extracted_scenario[:, 2] = cluster_labels_new
        test_cases[test_cases_names[i]] = signal_extracted_scenario

Separate data

In [4]:
np.random.shuffle(signal_extracted)

# Unpack the data
id_train = signal_extracted[:, 0]
sens_train = signal_extracted[:, 1]
cluster_train = signal_extracted[:, 2].copy()
signals_train = signal_extracted[:, 3:]

Fix the labels 
- for training the labels should start from 0 and they are currently skipping value 7
- for test the values are labeled as clustering output (ex 1-5 not the recorded defect class)

- Scenario 3 - PD = cluster 1
- Scenario 9 - PD = cluster 2
- Scenario 12 - PD = cluster 3, 4


In [5]:
cluster_train[cluster_train > 6] = cluster_train[cluster_train > 6] - 1
cluster_train = cluster_train - 1
sensors_sorted = np.sort(np.unique(sens_train))

### Binary

In [6]:
y_train = np.zeros(np.shape(cluster_train))
y_train[cluster_train > 5] = 1  # the last few labels are interference (1) the first few PD (0)

# scaler = StandardScaler()
# X_train = scaler.fit_transform(signals_train)

X_train = signals_train

In [7]:
best_models_bin = []
results_bin = []

# One model per sensor is trained
for sensor in sensors_sorted:

    print(f'Train sens: {sensor}')
    X_train_sens = X_train[sens_train == sensor]
    y_train_sens = y_train[sens_train == sensor]

    ann_classifier = ANN(np.shape(X_train)[1], 25, len(np.unique(y_train)))
    ann_classifier.cv_fit(X_train_sens, y_train_sens, patience = 10)

    best_models_bin.append(ann_classifier)
    results_bin.append(ann_classifier.val_score_)


Train sens: 0
Training model with cv = 5 splits
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300


In [8]:
for case in test_cases_names:

    data_case = test_cases[case]

    id_test = data_case[:, 0]
    sens_test = data_case[:, 1]
    cluster_test = data_case[:, 2]
    signals_test = data_case[:, 3:]
    

    # Scale the input and define binary label
    # scaler = StandardScaler()
    # X_test = scaler.fit_transform(signals_test)

    X_test = signals_test
    y_true = cluster_test

    # Create merged label by sensor (average target class for all events belonging to the same instance)
    unique_inst_id = np.sort(np.unique(id_test))
    y_true_merged = [round(np.average(y_true[id_test == id])) for id in unique_inst_id]

    id_list = []
    y_list = []

    # Iterate through sensors
    for sens in sensors_sorted:

        # Extract X and id for the tried sensor
        X_test_sens = X_test[sens_test == sens]
        id_test_sens = id_test[sens_test == sens]

        # Predict
        y_test_sens = best_models_bin[sens].predict(X_test_sens)

        id_list.extend(id_test_sens)
        y_list.extend(y_test_sens)


    id_array = np.array(id_list)
    y_array = np.array(y_list)
    y_test_merged = [round(np.average(y_array[id_array == id])+0.01) for id in unique_inst_id]

    print(f'Accuracy for case: {case}: {accuracy_score(y_true_merged, y_test_merged)}')
    # print(confusion_matrix(y_true_merged, y_test_merged))
    # print(recall_score(y_true_merged, y_test_merged))

Accuracy for case: A: 0.899812734082397
Accuracy for case: B: 0.961218836565097
Accuracy for case: C: 0.7989080982711556


### Multiclass

In [14]:
y_train = cluster_train  # the last few labels are interference (1) the first few PD (0)

# scaler = StandardScaler()
# X_train = scaler.fit_transform(signals_train)
X_train = signals_train

In [18]:
best_models_mc = []
results_mc = []

# One model per sensor is trained
for sensor in sensors_sorted:

    print(f'Train sens: {sensor}')
    X_train_sens = X_train[sens_train == sensor]
    y_train_sens = y_train[sens_train == sensor]

    ann_classifier = ANN(np.shape(X_train)[1], 60, len(np.unique(y_train)))
    ann_classifier.cv_fit(X_train_sens, y_train_sens, patience = 10)

    best_models_mc.append(ann_classifier)
    results_mc.append(ann_classifier.val_score_)

Train sens: 0
Training model with cv = 5 splits
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300


In [19]:
for case in test_cases_names:

    data_case = test_cases[case]

    id_test = data_case[:, 0]
    sens_test = data_case[:, 1]
    cluster_test = data_case[:, 2]
    signals_test = data_case[:, 3:]
    

    # Scale the input and define binary label
    # scaler = StandardScaler()
    # X_test = scaler.fit_transform(signals_test)
    X_test = signals_test


    y_true = cluster_test

    # Create merged label by sensor (average target class for all events belonging to the same instance)
    unique_inst_id = np.sort(np.unique(id_test))
    y_true_merged = [round(np.average(y_true[id_test == id])) for id in unique_inst_id]

    id_list = []
    y_list = []

    # Iterate through sensors
    for sens in sensors_sorted:

        # Extract X and id for the tried sensor
        X_test_sens = X_test[sens_test == sens]
        id_test_sens = id_test[sens_test == sens]

        # Predict
        y_test_sens = best_models_mc[sens].predict(X_test_sens)

        id_list.extend(id_test_sens)
        y_list.extend(y_test_sens)


    id_array = np.array(id_list)
    y_array = np.array(y_list)
    y_array_merged = [round(np.average(y_array[id_array == id])+0.01) for id in unique_inst_id]
    y_test_merged = np.zeros(len(y_array_merged))
    y_test_merged[np.array(y_array_merged) > 5] = 1



    print(f'Accuracy for case: {case}: {accuracy_score(y_true_merged, y_test_merged)}')
    # print(confusion_matrix(y_true_merged, y_test_merged))
    # print(recall_score(y_true_merged, y_test_merged))

Accuracy for case: A: 0.8146067415730337
Accuracy for case: B: 0.9039704524469068
Accuracy for case: C: 0.7497725204731575


In [20]:
np.average(results_mc)

0.9461251944303513