## **ECE 4782 Machine Learning Model - EEG/ECG Data**

### **Import libraries and preprocessed data**

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Read csv
patient_data = pd.read_csv('FINALBMED4783_eegFeatures.csv')

# Dropping patients with extremely brain activity features
# patient_data = patient_data.drop([9,12,14,16,19])

# Dropping Empty Rows
# patient_data = patient_data.drop(range(23,83))

# Drop useless columns
patient_data = patient_data.drop(['Age', 'Male', 'Female', 'Patient Number', 'Other', 'ROSC', 'OHCA', 'VFIB', 'TTM', 'Hospital', 'Num Trials', 'Unnamed: 24'], axis=1)
cols = patient_data.columns
# Drop ECG columns
#patient_data = patient_data.drop(['QRS Amp Avg', 'QRS TI Avg', 'HR Avg', 'HRV Avg', 'QRS Amp STD',
#       'QRS TI STD', 'HR STD', 'HRV STD', 'HR R^2 Trials'], axis=1)

patient_data = patient_data.dropna(subset=['QRS Amp Avg'])
patient_data = patient_data.drop([30, 40, 41, 55])

# Get CPC Scores
y = patient_data.get(['CPC'])

# Drop CPC from training data set
X = patient_data[['PCA 1', 'PCA 2']]
# pd.set_option('display.max_columns', None)
X

Unnamed: 0,PCA 1,PCA 2
0,58263.23334,-25467.424931
1,-60947.319267,-9603.502406
3,116949.197303,29651.9329
4,186878.747185,13711.317518
5,81104.293057,11467.088823
6,79413.640499,-39037.194966
7,-36221.933875,10690.213289
8,-8722.658946,-11702.632533
15,32397.705046,14004.154191
16,-48594.083558,7528.712999


### **Variance Testing**

**Function to find features with highest variance**

In [17]:
def find_highest_variance_features(data):
    variances = data.var(axis=0)
    sorted_variances_indices = np.argsort(variances)[::-1]
    return sorted_variances_indices

In [18]:
highest_variance_indices = find_highest_variance_features(X)
highest_variance_indices.head()

PCA 2    0
PCA 1    1
dtype: int64

### **Split data into training/testing sets (70/30 Split)**

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train

Unnamed: 0,PCA 1,PCA 2
20,-79981.905001,-4490.690002
57,38012.901811,85028.290539
16,-48594.083558,7528.712999
0,58263.23334,-25467.424931
5,81104.293057,11467.088823
24,-81367.214957,-3815.904
26,-9040.803423,678.418333
6,79413.640499,-39037.194966
21,49816.792686,9930.459498
19,91748.764013,-9275.866858


### **Random Forest**

In [20]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train.values.ravel())
rf_predictions = rf_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, rf_predictions)

# Calculate precision
precision = precision_score(y_test, rf_predictions, average='weighted')

# Calculate recall
recall = recall_score(y_test, rf_predictions, average='weighted')

# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

importances = rf_model.feature_importances_
feature_names = list(X_train.columns)
feature_importances = dict(zip(X.columns, importances))
sorted_features = sorted(feature_importances.items(), key=lambda x: x[1], reverse=True)

# Printing the features with highest importances
for feature, importance in sorted_features:
    print(f"{feature}: {importance}")


Accuracy: 0.2857142857142857
Precision: 0.2857142857142857
Recall: 0.2857142857142857
PCA 2: 0.5298533793694485
PCA 1: 0.47014662063055146


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### **K-Means**

In [21]:
kmeans_model = KMeans(n_clusters=5, random_state=42)
kmeans_model.fit(X_train)
kmeans_predictions = kmeans_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, kmeans_predictions)

# Calculate precision
precision = precision_score(y_test, kmeans_predictions, average='weighted')

# Calculate recall
recall = recall_score(y_test, kmeans_predictions, average='weighted')

# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

print(np.transpose(y_test.values) == kmeans_predictions)


  super()._check_params_vs_input(X, default_n_init=10)


Accuracy: 0.14285714285714285
Precision: 0.42857142857142855
Recall: 0.14285714285714285
[[False False False False  True False False]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### **Support Vector Machines**

In [22]:
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train.values.ravel())
svm_predictions = svm_model.predict(X_test)

In [23]:
# Calculate accuracy
accuracy = accuracy_score(y_test, svm_predictions)

# Calculate precision
precision = precision_score(y_test, svm_predictions, average='weighted')

# Calculate recall
recall = recall_score(y_test, svm_predictions, average='weighted')

# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.2857142857142857
Precision: 0.5714285714285714
Recall: 0.2857142857142857


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### **Plotting SVC**