In [2]:
from pathlib import Path, PureWindowsPath

import matplotlib.pyplot as plt
import mne
import numpy as np
import pandas as pd
from mne import io
from sklearn.ensemble import IsolationForest

  from numpy.core.umath_tests import inner1d


In [3]:
# Set 'file_path':
file_path = str(Path(r'ANTT_104/ANTT_104_SR_bb_epoch.set'))

In [4]:
%%capture
# Load 'file_path':
try:
    epochs = mne.io.read_epochs_eeglab(file_path)
except:
    epochs = mne.io.read_raw_eeglab(file_path)

In [5]:
%%capture 
# Convert to and clean DataFrame:
df = epochs.to_data_frame()
columns, df = sorted(list(df.columns)), df.reset_index()

try: 
    df = df.drop(['condition'], axis=1)
except:
    pass

cleaned_columns = ['time']
if 'epoch' in list(df.columns):
    cleaned_columns += ['epoch']
cleaned_columns += columns

df = df[cleaned_columns]
df_ = df.copy()

In [8]:
df_

signal,time,epoch,C3,C4,CZ,F3,F4,F7,F8,FZ,...,Fp2,O1,O2,P3,P4,PZ,T3,T4,T5,T6
0,0,0,18.251484,8.878200,13.774623,20.785551,10.754375,17.883970,0.314645,13.377320,...,-1.321949,11.987516,5.540322,24.166014,19.589321,17.815702,12.756618,30.533876,36.696552,15.608931
1,2,0,19.413998,10.551571,14.403508,21.628254,12.476474,20.827175,2.359864,15.560883,...,-3.272072,11.222661,6.755274,24.486435,20.475994,16.376286,13.033691,34.110550,36.726627,18.329454
2,5,0,19.821663,11.870633,14.418718,21.664150,14.014328,23.378773,4.141986,16.829287,...,-4.371666,10.408059,7.632828,24.295633,21.047903,14.898409,12.395204,36.771248,36.528152,20.288399
3,8,0,18.991369,12.652283,13.442754,20.528532,15.265518,25.149801,5.478600,16.688211,...,-4.117080,9.531569,8.143073,23.236612,21.139702,13.319111,10.335381,37.951443,35.864132,21.136862
4,10,0,16.908972,12.923745,11.498272,18.488470,16.202923,25.921816,6.304400,15.300371,...,-2.579046,8.640326,8.575800,21.252016,20.751997,11.577202,7.064504,37.610100,34.594765,21.063717
5,12,0,14.078374,12.802263,9.005999,16.361603,16.782419,25.839060,6.640090,13.391527,...,-0.417853,7.833030,9.244510,18.643202,19.920885,9.664316,3.506332,36.171459,32.830830,20.557289
6,15,0,11.339025,12.346977,6.602964,15.101310,16.873850,25.424156,6.541913,11.857291,...,1.391637,7.227495,10.141708,15.974682,18.590885,7.670417,0.918454,34.290600,30.990147,20.017326
7,18,0,9.496382,11.523294,4.861506,15.242969,16.312838,25.323471,6.080172,11.284827,...,2.032457,6.918358,10.856853,13.836056,16.635773,5.787090,0.284176,32.568512,29.655689,19.518223
8,20,0,8.954931,10.308557,4.061986,16.556246,15.063479,25.937601,5.357986,11.686769,...,1.258330,6.941908,10.838818,12.586424,14.024016,4.256277,1.819123,31.344296,29.294687,18.873804
9,22,0,9.571368,8.835130,4.139492,18.161879,13.370912,27.201981,4.539310,12.611631,...,-0.512016,7.261451,9.795579,12.234160,10.977844,3.283539,4.887772,30.650286,30.016972,17.905113


In [6]:
# Select values from columns for IForest:
value_columns = list(df.columns)

try:
    if 'time' in value_columns:
        value_columns.remove('time')
    if 'epoch' in value_columns:
        value_columns.remove('epoch')
except:
    pass

df_values = df_[value_columns]

In [7]:
df_values

signal,C3,C4,CZ,F3,F4,F7,F8,FZ,Fp1,Fp2,O1,O2,P3,P4,PZ,T3,T4,T5,T6
0,18.251484,8.878200,13.774623,20.785551,10.754375,17.883970,0.314645,13.377320,8.283985,-1.321949,11.987516,5.540322,24.166014,19.589321,17.815702,12.756618,30.533876,36.696552,15.608931
1,19.413998,10.551571,14.403508,21.628254,12.476474,20.827175,2.359864,15.560883,5.907608,-3.272072,11.222661,6.755274,24.486435,20.475994,16.376286,13.033691,34.110550,36.726627,18.329454
2,19.821663,11.870633,14.418718,21.664150,14.014328,23.378773,4.141986,16.829287,4.017718,-4.371666,10.408059,7.632828,24.295633,21.047903,14.898409,12.395204,36.771248,36.528152,20.288399
3,18.991369,12.652283,13.442754,20.528532,15.265518,25.149801,5.478600,16.688211,2.892823,-4.117080,9.531569,8.143073,23.236612,21.139702,13.319111,10.335381,37.951443,35.864132,21.136862
4,16.908972,12.923745,11.498272,18.488470,16.202923,25.921816,6.304400,15.300371,2.558281,-2.579046,8.640326,8.575800,21.252016,20.751997,11.577202,7.064504,37.610100,34.594765,21.063717
5,14.078374,12.802263,9.005999,16.361603,16.782419,25.839060,6.640090,13.391527,2.808484,-0.417853,7.833030,9.244510,18.643202,19.920885,9.664316,3.506332,36.171459,32.830830,20.557289
6,11.339025,12.346977,6.602964,15.101310,16.873850,25.424156,6.541913,11.857291,3.310123,1.391637,7.227495,10.141708,15.974682,18.590885,7.670417,0.918454,34.290600,30.990147,20.017326
7,9.496382,11.523294,4.861506,15.242969,16.312838,25.323471,6.080172,11.284827,3.746432,2.032457,6.918358,10.856853,13.836056,16.635773,5.787090,0.284176,32.568512,29.655689,19.518223
8,8.954931,10.308557,4.061986,16.556246,15.063479,25.937601,5.357986,11.686769,3.963636,1.258330,6.941908,10.838818,12.586424,14.024016,4.256277,1.819123,31.344296,29.294687,18.873804
9,9.571368,8.835130,4.139492,18.161879,13.370912,27.201981,4.539310,12.611631,4.061266,-0.512016,7.261451,9.795579,12.234160,10.977844,3.283539,4.887772,30.650286,30.016972,17.905113


In [None]:
# Run IForest:
X = df_values
clfIF = IsolationForest(random_state=42, contamination=0.00001, n_jobs=-1)
clfIF.fit(X)
pred_train, pred_test = clfIF.predict(X), clfIF.predict(X)
count_train, count_test = np.unique(ar=pred_train, return_counts=True), np.unique(ar=pred_test, return_counts=True)
index_train, index_test = [i for i,x in enumerate(pred_train) if x == -1] , [i for i,x in enumerate(pred_test) if x == -1]
df_IF = df_.loc[index_test]
num_anomalies = count_train[1][0], count_test[1][0]; total_pts = count_train[1][1], count_test[1][1]

In [None]:
print("Number of anomalies: ", num_anomalies)
print("Total points:", total_pts)