## **ECE 4782 Machine Learning Model - EEG/ECG Data**

### **Import libraries and preprocessed data**

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Read csv
patient_data = pd.read_csv('BMED4783_eegFeatures_noPreProc.csv')

# Dropping Empty Rows
patient_data = patient_data.drop(range(23,600))

# Drop useless columns
patient_data = patient_data.drop(['Age','Male', 'Female', 'Patient Number', 'Other', 'ROSC', 'OHCA', 'VFIB', 'TTM', 'Hospital', 'Num Trials'], axis=1)

# Get CPC Scores
cpc_data = patient_data.get(['CPC'])
for i in range(len(cpc_data)):
    continue

# Drop CPC from training data set
training_data = patient_data.drop(['CPC'],axis=1)

len(cpc_data)

23

### **Variance Testing**

**Function to find features with highest variance**

In [3]:
def find_highest_variance_features(data):
    variances = data.var(axis=0)
    sorted_variances_indices = np.argsort(variances)[::-1]
    return sorted_variances_indices

In [4]:
highest_variance_indices = find_highest_variance_features(training_data)
highest_variance_indices.head()

Theta Slope    6
Delta Slope    2
Beta Slope     5
Alpha Slope    7
Theta STD      3
dtype: int64

### **Split data into training/testing sets (80/20 Split)**

In [5]:
X_train, X_test, y_train, y_test = train_test_split(training_data, cpc_data, test_size=0.3, random_state=42)

### **Random Forest**

In [6]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train.values.ravel())
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)

importances = rf_model.feature_importances_
feature_names = list(X_train.columns)
feature_importances = dict(zip(training_data.columns, importances))
sorted_features = sorted(feature_importances.items(), key=lambda x: x[1], reverse=True)

# Printing the features with highest importances
for feature, importance in sorted_features:
    print(f"{feature}: {importance}")

print(rf_predictions)
print(y_test)

Random Forest Accuracy: 0.7142857142857143
Beta Slope: 0.10727359187492447
Beta STD: 0.10154407179376729
Alpha Slope: 0.10095253466992712
Beta Avg Pow: 0.09866926649342965
Avg Alpha Pow: 0.08861598761633907
Delta Slope: 0.0884085222067854
Theta STD: 0.07623820710994791
Alpha STD: 0.07569701919659427
Delta STD: 0.07148513799811063
Theta Slope: 0.06831483793166401
Delta Avg Pow: 0.0630945581776902
Theta Avg Pow: 0.05970626493081999
[1. 5. 1. 1. 5. 1. 1.]
    CPC
15  2.0
9   1.0
0   1.0
8   1.0
17  5.0
12  1.0
1   1.0


### **K-Means**

In [7]:
kmeans_model = KMeans(n_clusters=5, random_state=42)
kmeans_model.fit(X_train)
kmeans_predictions = kmeans_model.predict(X_test)

# print(X_test)
print(y_test)
kmeans_predictions

    CPC
15  2.0
9   1.0
0   1.0
8   1.0
17  5.0
12  1.0
1   1.0


  super()._check_params_vs_input(X, default_n_init=10)


array([3, 0, 3, 3, 0, 4, 0], dtype=int32)

### **Support Vector Machines**

In [8]:
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train.values.ravel())
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.5714285714285714
