In [3]:
import pandas as pd
import numpy as np


In [4]:
# Load the dataset from the local file
data = pd.read_csv('Epileptic Seizure Recognition.csv')
data.to_csv('epilepsy.csv')

df_new = data.drop(['Unnamed'],axis=1)
df_new.to_csv('../data/epilepsy.csv', index=False)
y = data['y'].to_numpy()
x = df_new.to_numpy()
# Display the first few rows of the dataset
print(data.head())



      Unnamed   X1   X2   X3   X4   X5   X6   X7   X8   X9  ...  X170  X171  \
0  X21.V1.791  135  190  229  223  192  125   55   -9  -33  ...   -17   -15   
1  X15.V1.924  386  382  356  331  320  315  307  272  244  ...   164   150   
2     X8.V1.1  -32  -39  -47  -37  -32  -36  -57  -73  -85  ...    57    64   
3   X16.V1.60 -105 -101  -96  -92  -89  -95 -102 -100  -87  ...   -82   -81   
4   X20.V1.54   -9  -65  -98 -102  -78  -48  -16    0  -21  ...     4     2   

   X172  X173  X174  X175  X176  X177  X178  y  
0   -31   -77  -103  -127  -116   -83   -51  4  
1   146   152   157   156   154   143   129  1  
2    48    19   -12   -30   -35   -35   -36  5  
3   -80   -77   -85   -77   -72   -69   -65  5  
4   -12   -32   -41   -65   -83   -89   -73  5  

[5 rows x 180 columns]


In [6]:
# Structure of the Dataset:
# Rows (Samples):
# Number of Features: 178 features.
# Number of Classes: 5 classes (seizure-related vs non-seizure).
# There are 11,500 samples (rows) in the dataset.
# Each row represents a 1-second long segment of EEG recording that contains 178 data points, corresponding to different time-series measurements of the electrical activity in the brain.
# Columns (Features):

# The dataset has 179 columns in total.
# The first 178 columns are the features, representing the EEG data points for each second of recording.
# The 179th column is the label column, indicating the class or condition of the brain activity.
# Class Labels (5 classes):

# 1: Recording during seizure activity.
# 2: Recording from the tumor region but without seizure.
# 3: Recording from the healthy region of the brain.
# 4: Recording when the patient's eyes are closed (non-seizure).
# 5: Recording when the patient's eyes are open (non-seizure).
# Data Points:

# Each row contains 178 numerical attributes, which are the EEG signal amplitudes measured over time.
# The dataset is structured such that each sample can be classified into one of the five categories, with class 1 being the most important for detecting seizures.

In [7]:
def correlations(data_array, target_array):
    correlations = []
    for col in range(data_array.shape[1]):
        column = data_array[:, col]
        correlation = np.corrcoef(column, target_array)[0, 1]  # Correlation between column and target
        correlations.append(correlation)

    return np.array(correlations)

In [12]:
corrs = correlations(x,y)

In [14]:
print(corrs)
print(np.max(corrs))

[ 0.02237466  0.01915436  0.01346906  0.00476269 -0.00659672 -0.01825588
 -0.02642169 -0.02747807 -0.02945193 -0.03445892 -0.03805648 -0.03537895
 -0.02594589 -0.01822883 -0.01323408 -0.01111903 -0.01024499 -0.00943358
 -0.01008148 -0.01140073 -0.01351987 -0.01362608 -0.01330809 -0.01701149
 -0.02405514 -0.03146622 -0.03088207 -0.02660089 -0.02051289 -0.01801652
 -0.01708753 -0.01865538 -0.02109845 -0.02401721 -0.02450336 -0.01940586
 -0.01142848 -0.00468699 -0.00213847 -0.00579046 -0.01557608 -0.028724
 -0.03831979 -0.04049922 -0.03356828 -0.0214829  -0.00968038 -0.00014164
  0.00418121  0.00719068  0.00618202  0.00309919 -0.00487026 -0.01320945
 -0.0195134  -0.02060135 -0.01550487 -0.00400597  0.00958379  0.01939117
  0.02172349  0.01728953  0.00696933 -0.00428006 -0.01177114 -0.01877258
 -0.02275054 -0.02594813 -0.02557272 -0.02483341 -0.02336819 -0.02372923
 -0.02020758 -0.01400318 -0.00578104 -0.00087183 -0.00122838 -0.00384151
 -0.00362195  0.00155687  0.0101576   0.01704364  0.0

In [5]:
from sklearn.decomposition import PCA
pca = PCA(n_components=20)


In [6]:
pca.fit(x)
print(pca.explained_variance_ratio_)

[0.05587618 0.05256036 0.04952756 0.04824032 0.04444949 0.04144632
 0.04065098 0.03923183 0.03621733 0.03446312 0.03296624 0.03239918
 0.02966738 0.02917025 0.02569377 0.02383212 0.02161517 0.0206643
 0.0201929  0.01940261]
