## **ECE 4782 Machine Learning Model - EEG/ECG Data**

### **Import libraries and preprocessed data**

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Read csv
patient_data = pd.read_csv('BMED4783_eegFeatures 2.csv')

# Dropping patients with extremely brain activity features
patient_data = patient_data.drop([9,12,14,16,19])

# Dropping Empty Rows
patient_data = patient_data.drop(range(23,600))

# Drop useless columns
patient_data = patient_data.drop(['Age', 'Male', 'Female', 'Patient Number', 'Other', 'ROSC', 'OHCA', 'VFIB', 'TTM', 'Hospital', 'Num Trials'], axis=1)

# Get CPC Scores
cpc_data = patient_data.get(['CPC'])

# Drop CPC from training data set
training_data = patient_data.drop(['CPC'],axis=1)

training_data

Unnamed: 0,Avg Alpha Pow,Beta Avg Pow,Delta Avg Pow,Theta Avg Pow,Alpha STD,Beta STD,Delta STD,Theta STD,Alpha Slope,Beta Slope,Delta Slope,Theta Slope
0,1406.392232,256.063073,67021.99774,4730.332687,1561.364896,317.231671,130417.305893,7981.394176,-8.952353,-2.088764,790.783022,-2.635894
1,1245.536265,257.853748,9282.849428,3526.639784,2789.432724,506.13245,23148.95386,8268.988807,-15.500927,-2.114308,-124.29785,-26.84835
2,10563.209372,2540.088445,5568.156275,4197.673798,20211.504597,3940.854312,9676.409683,8439.812387,3421.112828,303.537305,1490.721133,1541.566741
3,19788.372056,1336.391006,139253.72503,32149.145138,28651.556556,1509.500481,140014.416205,52278.414461,-1114.432451,-61.852117,-1985.659302,-2103.306749
4,9928.275903,1475.070013,179571.063271,14091.045443,10770.694169,1709.7777,208829.334696,11557.915628,-261.45477,-31.951763,5029.561012,-318.777837
5,1664.729265,225.686126,112898.432393,16196.326871,1682.855843,234.244241,123878.218778,17497.787613,15.813093,3.038198,1526.276314,-67.10395
6,2863.352769,585.693739,64282.56225,7018.71562,5108.329329,1261.968297,157202.599864,12301.290305,-118.856617,-3.561455,1040.871476,-126.935874
7,13163.456605,1694.475024,33153.560817,22596.228407,16904.058511,1399.156389,30375.694492,28054.34183,-711.099685,-38.808878,-947.926222,-1101.604495
8,4017.370786,2148.085361,71361.72073,16348.73318,6420.846286,3608.493562,113299.44495,28379.049227,32.354825,23.383768,877.163101,159.532219
10,18.572151,5.274173,140.524022,22.133481,123.193715,34.984903,932.130908,146.816906,-2.476287,-0.703223,-18.736536,-2.951131


### **Variance Testing**

**Function to find features with highest variance**

In [33]:
def find_highest_variance_features(data):
    variances = data.var(axis=0)
    sorted_variances_indices = np.argsort(variances)[::-1]
    return sorted_variances_indices

In [34]:
highest_variance_indices = find_highest_variance_features(training_data)
highest_variance_indices.head()

Theta Slope    6
Delta Slope    2
Beta Slope     7
Alpha Slope    3
Theta STD      4
dtype: int64

### **Split data into training/testing sets (70/30 Split)**

In [35]:
X_train, X_test, y_train, y_test = train_test_split(training_data, cpc_data, test_size=0.3, random_state=42)

### **Random Forest**

In [36]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train.values.ravel())
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)

importances = rf_model.feature_importances_
feature_names = list(X_train.columns)
feature_importances = dict(zip(training_data.columns, importances))
sorted_features = sorted(feature_importances.items(), key=lambda x: x[1], reverse=True)

# Printing the features with highest importances
for feature, importance in sorted_features:
    print(f"{feature}: {importance}")


Random Forest Accuracy: 0.8333333333333334
Beta Avg Pow: 0.13560394593632172
Beta STD: 0.13083628871824157
Alpha STD: 0.1113520245664468
Theta STD: 0.09612562703521216
Alpha Slope: 0.09370917561248066
Avg Alpha Pow: 0.0928219083603249
Beta Slope: 0.08128966261937798
Theta Slope: 0.07635970868114121
Theta Avg Pow: 0.06777104803712342
Delta STD: 0.04973656831634998
Delta Avg Pow: 0.03551652233036878
Delta Slope: 0.02887751978661069


### **K-Means**

In [37]:
kmeans_model = KMeans(n_clusters=5, random_state=42)
kmeans_model.fit(X_train)
kmeans_predictions = kmeans_model.predict(X_test)

print(X_test)
print(y_test)
kmeans_predictions

    Avg Alpha Pow  Beta Avg Pow  Delta Avg Pow  Theta Avg Pow     Alpha STD  \
0     1406.392232    256.063073   67021.997740    4730.332687   1561.364896   
1     1245.536265    257.853748    9282.849428    3526.639784   2789.432724   
8     4017.370786   2148.085361   71361.720730   16348.733180   6420.846286   
5     1664.729265    225.686126  112898.432393   16196.326871   1682.855843   
3    19788.372056   1336.391006  139253.725030   32149.145138  28651.556556   
17      79.861972     67.849167     273.172210     174.553918    240.830094   

       Beta STD      Delta STD     Theta STD  Alpha Slope  Beta Slope  \
0    317.231671  130417.305893   7981.394176    -8.952353   -2.088764   
1    506.132450   23148.953860   8268.988807   -15.500927   -2.114308   
8   3608.493562  113299.444950  28379.049227    32.354825   23.383768   
5    234.244241  123878.218778  17497.787613    15.813093    3.038198   
3   1509.500481  140014.416205  52278.414461 -1114.432451  -61.852117   
17   210

  super()._check_params_vs_input(X, default_n_init=10)


array([1, 0, 4, 4, 4, 0], dtype=int32)

### **Support Vector Machines**

In [38]:
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train.values.ravel())
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.8333333333333334
