# Semi-Supervised Training
An attempt to train something in a semi-supervised manner

https://scikit-learn.org/stable/modules/label_propagation.html

In [19]:
# Imports
import os, sys
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from numpy import mean

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

from machine_learning_data_generation import loadPickeldDataset, createXyFromFrequencyDf, loadOnlineEEGdata
from consts import TARGET_FATIGUE, TARGET_NORMAL, TARGET_UNLABELED

D:\Masterthesis\thesis_eeg\code


In [2]:
eegDataset = loadPickeldDataset("D:/Masterthesis/EEG_Data/eeg_data_online")

Load Subject 1 Data...
Load Subject 10 Data...
Load Subject 11 Data...
Load Subject 12 Data...
Load Subject 2 Data...
Load Subject 3 Data...
Load Subject 4 Data...
Load Subject 5 Data...
Load Subject 6 Data...
Load Subject 7 Data...
Load Subject 8 Data...
Load Subject 9 Data...


In [25]:
EPOCH_SERIES = 0
FREQ_DF = 1

TARGET_LABEL_DICT = {TARGET_NORMAL : 0,
                     TARGET_UNLABELED : -1,
                     TARGET_FATIGUE : 1}


def createSemiSupervisedDataFromOnlineEegData(eegDataset, target_label_dict, normalData = 0.3) -> (((np.array, np.array)),(np.array, np.array)):
    ''' This functions creates Data for testing semi-supervised learning techniques
    
    The data creates normal and unlabeled data. For both it creates an X and y
    '''
    X_frequency_features_normal = None
    y_frequency_features_normal = None
    
    X_frequency_features_unlabeled = None
    y_frequency_features_unlabeled = None


    for subject in eegDataset:
        #print("Processing Subject {}...".format(subject))
        
        # calculate how much rows of each data goes to unlabeled and how much to the normal data
        normalDataPercentage = len(eegDataset[subject][TARGET_NORMAL][FREQ_DF]) - int(len(eegDataset[subject][TARGET_NORMAL][FREQ_DF]) * (1.0-normalData))

        normalDf = eegDataset[subject][TARGET_NORMAL][FREQ_DF][0:normalDataPercentage]
        unlabeledDf = eegDataset[subject][TARGET_NORMAL][FREQ_DF][normalDataPercentage::]

        # Create X,y from the frequency features - normal
        tempX_freq_normal, tempy_freq_normal = createXyFromFrequencyDf(freqDf = normalDf,
                                                         target = target_label_dict[TARGET_NORMAL])
        
        try:
            X_frequency_features_normal = np.concatenate((X_frequency_features_normal, tempX_freq_normal))
            y_frequency_features_normal = np.concatenate((y_frequency_features_normal, tempy_freq_normal))
        except ValueError: # happens the first, when the init value is none
            X_frequency_features_normal = tempX_freq_normal
            y_frequency_features_normal = tempy_freq_normal
        
        
        
        # Create X,y from the frequency features - normal but into the unlabeled df
        tempX_freq_unlabeled, tempy_freq_unlabeled = createXyFromFrequencyDf(freqDf = unlabeledDf,
                                                         target = target_label_dict[TARGET_UNLABELED])
        
        try:
            X_frequency_features_unlabeled = np.concatenate((X_frequency_features_unlabeled, tempX_freq_unlabeled))
            y_frequency_features_unlabeled = np.concatenate((y_frequency_features_unlabeled, tempy_freq_unlabeled))
        except ValueError: # happens the first, when the init value is none
            X_frequency_features_unlabeled = tempX_freq_unlabeled
            y_frequency_features_unlabeled = tempy_freq_unlabeled
            
            
        # Create X,y from the frequency features - fatigue but into the unlabeled df
        tempX_freq_unlabeled_fatigue, tempy_freq_unlabeled_fatigue = createXyFromFrequencyDf(freqDf = eegDataset[subject][TARGET_FATIGUE][FREQ_DF],
                                                                             target = target_label_dict[TARGET_UNLABELED])
        
        X_frequency_features_unlabeled = np.concatenate((X_frequency_features_unlabeled, tempX_freq_unlabeled_fatigue))
        y_frequency_features_unlabeled = np.concatenate((y_frequency_features_unlabeled, tempy_freq_unlabeled_fatigue))
            
        

    print("Finished creating normal/unlabeled Freq Data")
    return ((X_frequency_features_normal, y_frequency_features_normal),(X_frequency_features_unlabeled, y_frequency_features_unlabeled))

In [36]:
freq_normal, freq_unlabeled = createSemiSupervisedDataFromOnlineEegData(eegDataset, TARGET_LABEL_DICT)
X_freq_normal = freq_normal[0]
y_freq_normal = freq_normal[1]

X_freq_unlabeled = freq_unlabeled[0]
y_freq_unlabeled = freq_unlabeled[1]

print("Normal Data Shape: X: {} --- y: {}".format(X_freq_normal.shape, y_freq_normal.shape))
print("Unlabeled Data Shape: X: {} --- y: {}".format(X_freq_unlabeled.shape, y_freq_unlabeled.shape))


Finished creating normal/unlabeled Freq Data
Normal Data Shape: X: (216, 1, 1200) --- y: (216,)
Unlabeled Data Shape: X: (1224, 1, 1200) --- y: (1224,)


### Now we have machine learning ready code

In [37]:
from sklearn import svm
from sklearn.semi_supervised import LabelSpreading