# Imports

In [None]:
import scipy.io
from pyedflib import highlevel
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from itertools import groupby
import csv
import pickle
from scipy.signal import butter, sosfilt, sosfiltfilt, sosfreqz
from scipy.signal import freqz, iirnotch, filtfilt
from sklearn.preprocessing import MinMaxScaler
from sklearn.base import TransformerMixin, BaseEstimator
import random
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Variables

In [None]:
sample_rate = 256
sec = 10
len_window = sample_rate*sec
overlap = 5

# Data Loading

In [None]:
df = pd.read_csv('../data_modeling/MinMaxScaler/data_modeling_patient_5.csv', index_col=[0])
df.head()

# Feature Engineering

## Flatten and concatenate the data

In [None]:
def flatten_window(window_df):
    if len(np.unique(window_df.iloc[:,-1])) == 1:
        target = window_df.iloc[0,-1]
    else:
        target = 1
    t_df = window_df.drop(columns = "is_seizure_target").transpose()
    flatten = pd.DataFrame(np.array(t_df).reshape(1,t_df.shape[0]*t_df.shape[1]))
    flatten["Target"] = target
    return flatten

In [None]:
def create_data_input(df):
    data = np.array([flatten_window(df.iloc[i:i+len_window+1]) for i in range(0,len(df)-len_window, overlap*sample_rate)])
    r=data.shape[0]
    c=data.shape[2]
    
    data = pd.DataFrame(data.reshape(r,c))
    X = data.iloc[:,:-1]
    y = data.iloc[:,-1]
    return X, y

In [None]:
X, y =  create_data_input(df)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

## Balancing the data

In [None]:
# Resample the minority class. You can change the strategy to 'auto' if you are not sure.
def oversampling(X, y): 
    sm = SMOTE(sampling_strategy='minority', random_state=7)
    X, y = sm.fit_resample(X, y)
    return X, y
    
X_train, y_train = oversampling(X_train, y_train)

# Modeling

## Implement models

In [None]:
# Instantiate the model
log_model = LogisticRegression(max_iter=2000)
knn_model = KNeighborsClassifier(n_neighbors=5)
svc_model = svc = SVC(kernel='linear', C=10)

# Fit the model on the data
log_model.fit(X_train, y_train)
knn_model.fit(X_train, y_train)
svc_model.fit(X_train, y_train);

## Model evaluation

### Accuracy

In [None]:
# Accuracy
log_score = accuracy_score(y_test, log_model.predict(X_test))
knn_score =  accuracy_score(y_test, knn_model.predict(X_test))
svc_score =  accuracy_score(y_test, svc_model.predict(X_test))

print(f"Accuracy {log_model}: {log_score}")
print(f"Accuracy {knn_model}: {knn_score}")
print(f"Accuracy {svc_model}: {svc_score}")

### Confusion matrix

In [None]:
# Confusion matrix - Model 1

y_true = y_test
y_pred = log_model.predict(X_test)

results_df = pd.DataFrame({"actual": y_true,
                           "predicted": y_pred})

confusion_matrix_log = pd.crosstab(index= results_df['actual'],
                               columns = results_df['predicted'])

confusion_matrix_log

In [None]:
# Confusion matrix - Model 2

y_true = y_test
y_pred = knn_model.predict(X_test)

results_df = pd.DataFrame({"actual": y_true,
                           "predicted": y_pred})

confusion_matrix_knn = pd.crosstab(index= results_df['actual'],
                               columns = results_df['predicted'])

confusion_matrix_knn

In [None]:
# Confusion matrix - Model 3

y_true = y_test
y_pred = svc_model.predict(X_test)

results_df = pd.DataFrame({"actual": y_true,
                           "predicted": y_pred})

confusion_matrix_svc = pd.crosstab(index= results_df['actual'],
                               columns = results_df['predicted'])

confusion_matrix_svc

### Cross-validation

In [None]:
# Cross-validation

cv_results_log = cross_validate(log_model, X_train, y_train, cv=5, 
                            scoring=['recall', 'f1'])

cv_results_knn = cross_validate(knn_model, X_train, y_train, cv=5, 
                            scoring=['recall', 'f1'])

cv_results_svc = cross_validate(svc_model, X_train, y_train, cv=5, 
                            scoring=['recall', 'f1'])

print(f"Recall {log_model}: {round(cv_results_log['test_recall'].mean(),3)}")
print(f"Recall {knn_model}: {round(cv_results_knn['test_recall'].mean(),3)}")
print(f"Recall {svc_model}: {round(cv_results_svc['test_recall'].mean(),3)}")

print(f"F1 score {log_model}: {round(cv_results_log['test_f1'].mean(),3)}")
print(f"F1 score {knn_model}: {round(cv_results_knn['test_f1'].mean(),3)}")
print(f"F1 score {svc_model}: {round(cv_results_svc['test_f1'].mean(),3)}")

# Test a new data

In [None]:
df_new = pd.read_csv('../data_modeling/MinMaxScaler/data_modeling_patient_2.csv', index_col=[0])
df_new.head()

In [None]:
X_new, y_new = create_data_input(df_new)

In [None]:
# Confusion matrix - Model 3

y_true = y_new
y_pred = svc_model.predict(X_new)

results_df = pd.DataFrame({"actual": y_true,
                           "predicted": y_pred})

confusion_matrix_svc = pd.crosstab(index= results_df['actual'],
                               columns = results_df['predicted'])

confusion_matrix_svc

In [None]:
true_negative = 218/(218+533)
true_negative