## DETECTION AND CORRECTION OF EYE BLINK ARTIFACT IN SINGLE CHANNEL ELECTROENCEPHALOGRAM (EEG) SIGNAL USING A SIMPLE k-means CLUSTERING ALGORITHM ##

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

### config ###

In [9]:
PATH = './data/filtered_data/filtered_s01_ex01_s01.csv'
OUTPUT_PATH = './data/output/'
# PATH = '../input/noised-data/filtered_s01_ex01_s01.csv'
# OUTPUT_PATH='./'
SIZE = 24000
ROWS = 120  # col size or no of rows
COLS = SIZE - ROWS + 1  # row size or no of cols

### graph ###

In [10]:
def all_graphs(y1, y2, y3):
    fig, axs = plt.subplots(3, figsize=(24, 8))

    g1 = axs[0]
    g2 = axs[1]
    g3 = axs[2]

    g1.set_title('Eye blink contaminated EEG signal')
    g1.plot(y1, color='blue', label='Eye blink contaminated EEG signal')
    g1.set_ylim([-40, 40])

    g2.set_title('Extracted EEG component')
    g2.plot(y2, color='blue', label='Extracted EEG component')
    g2.set_ylim([-40, 40])

    g3.set_title('Extracted eye blink component')
    g3.plot(y3, color='blue', label='Extracted eye blink component')
    g3.set_ylim([-40, 40])

    fig.subplots_adjust(hspace=0.8)
    plt.show()

### tool ###

In [11]:
def create_matrix_file(COL):
    file = pd.read_csv(PATH)  # (37481, 15)
    data = file.iloc[0:SIZE, COL]  # (37481,) channel 0 data (single channel data)
#     print("------MAT------")
    mat = pd.DataFrame()
    for i in range(COLS):
#         print('matrix', i)
        tmp = data.iloc[i:(i + ROWS)].to_numpy()
        mat = pd.concat([mat, pd.Series(tmp)], axis=1)
    mat.to_csv(OUTPUT_PATH+"mat.csv", index=False)

#     print("------ACM------")
    amc = []
    for i in range(COLS):
#         print('amc', i)
        vector = mat.iloc[:, i]
        activity = np.var(np.array(vector))
        mc = hjorth(vector)
        amc.append([activity, mc[0], mc[1]])
    f = pd.DataFrame(amc)
    f.to_csv(OUTPUT_PATH+"amc.csv", index=False)


def hjorth(X, D=None):
    if D is None:
        D = np.diff(X)
        D = D.tolist()

    D.insert(0, X[0])  # pad the first difference
    D = np.array(D)

    n = len(X)

    M2 = float(sum(D ** 2)) / n
    TP = sum(np.array(X) ** 2)
    M4 = 0
    for i in range(1, len(D)):
        M4 += (D[i] - D[i - 1]) ** 2
    M4 = M4 / n

    return np.sqrt(M2 / TP), np.sqrt(
        float(M4) * TP / M2 / M2
    )  # Hjorth Mobility and Complexity


def get_tms_tmr(matrix, result):
    tms = matrix.copy()
    tmr = matrix.copy()
    mi = np.bincount(result).argmax()
    n = np.size(result)
    for i in range(n):
        if result[i] == mi:
            tmr.iloc[:, i] = 0
        else:
            tms.iloc[:, i] = 0
    return tms, tmr


def get_diagonal_averaging(mat):
    N = ROWS
    K = COLS
    M = SIZE
    d = []
    for n in range(1, M + 1):
        if 1 <= n and n < N:
            s = 0
            for i in range(1, n + 1):
                s += mat.iloc[i - 1, (n - 1) - i + 1]
            d.append(s / n)
        elif N <= n and n <= K:
            s = 0
            for i in range(1, N + 1):
                s += mat.iloc[i - 1, (n - 1) - i + 1]
            d.append(s / N)
        elif K < n and n <= M:
            s = 0
            for i in range((n - K + 1), (M - K + 1) + 1):
                s += mat.iloc[i - 1, (n - 1) - i + 1]
            d.append(s / (M - n + 1))
        else:
            d.append(0)
    return np.array(d)

def show_results(COL):
    file = pd.read_csv(PATH)
    data = file.iloc[0:SIZE, COL]
    filtered_path = OUTPUT_PATH + 'filtered.csv'
    noise_path = OUTPUT_PATH + 'noise.csv'
    filtered_data = pd.read_csv(filtered_path)
    noise = pd.read_csv(noise_path)
    all_graphs(np.array(data), np.array(filtered_data), np.array(noise))


### technique ###

In [12]:
def k_mean_clustering(data):
    kmeans = KMeans(2, random_state=0)
    kmeans.fit(data)
    result = kmeans.fit_predict(data)
    return result

### Experiment ###

In [13]:
def single_channel(COL):
    path_mat = OUTPUT_PATH + 'mat.csv'
    path_amc = OUTPUT_PATH + 'amc.csv'
    data_mat = pd.read_csv(path_mat)
    data_amc = pd.read_csv(path_amc)
    result = k_mean_clustering(data_amc)
    tms, tmr = get_tms_tmr(data_mat, result)
    true_eeg = get_diagonal_averaging(tms)
    noise = get_diagonal_averaging(tmr)

    # output
    f = pd.DataFrame(true_eeg)
    f.to_csv(OUTPUT_PATH + "filtered.csv", index=False)
    f = pd.DataFrame(noise)
    f.to_csv(OUTPUT_PATH + "noise.csv", index=False)

### For Channel 1 Data ###

In [14]:
create_matrix_file(1)
single_channel(1)
show_results(1)

### For Channel 2 Data ###

In [15]:
create_matrix_file(2)
single_channel(2)
show_results(2)

### For Channel 3 Data ###

In [16]:
create_matrix_file(3)
single_channel(3)
show_results(3)

### For Channel 4 Data ###

In [17]:
create_matrix_file(4)
single_channel(4)
show_results(4)