In [None]:
import os
import sys
import math
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tsfresh.feature_extraction import feature_calculators as fc
from scipy.signal import welch
from scipy.stats import linregress
plt.style.use('seaborn-whitegrid')
sns.set(font_scale=1.5, style='whitegrid')
colors = ['red', 'blue', 'green', 'orange', 'purple']
LABELS = [1, 2, 3, 4, 5]
ticks = ['X1', 'X45', 'X90', 'X135', 'X178']
import csv
from collections import defaultdict

In [None]:
filename = './data.csv'

df = pd.read_csv(filename, index_col='Unnamed: 0')

## Time-series for randomly selected subjects of each label

In [None]:
# Organize the time_series per subject
subjects = set()
for sub in df.index.tolist():
    subjects.add('.'.join(sub.split('.')[1:]))
    
chunks = []
for i in range(23):
    chunks.append('X'+str(i+1))
    
for label in LABELS:
    label_subjects = [s for s in subjects if int(df.loc['X1.'+s, 'y'].item()) == label]
    for subject in random.sample(label_subjects, 5):
        ax = plt.figure(figsize=(12, 2))
        time_series = []
        for chunk in chunks:
            index = chunk + '.' + subject
            time_series += list(df.loc[index, 'X1':'X178'].values)
        ax = plt.plot(time_series, color=colors[label-1])
        plt.title('EEG Signals for Subject {0}: Label: {1}: {2}Seizure'
                  .format(subject, label, '' if label == 1 else 'non-'))
        plt.xlabel('Time (178ths of a second)')
        plt.ylabel('Cranial Activity (MicroVolts)')
        plt.show()

## Chunks per label

In [None]:
# Organize the chunks by label

for label in range(1, 6):
    label_df = df.loc[df['y'] == label, 'X1':'X178'].transpose()
    ax = plt.figure(figsize=(12, 6))
    for col in label_df.columns.values:
        ax = plt.plot(label_df[col], alpha=0.05, color=colors[label-1])
    plt.xticks(ticks)
    plt.title('Density of EEG Signal for pattern {0}:{1}Seizure'
              .format(label, ' ' if label == 1 else ' non-'))
    plt.xlabel('Time (178ths of a second)')
    plt.ylabel('Cranial Activity (MicroVolts)')
    plt.show()

## Descriptive statistics per label
#1 (Seizure): Red
#2 (Non-Seizure): Blue
#3 (Non-Seizure): Green
#4 (Non-Seizure): Orange
#5 (Non-Seizure): Purple

In [None]:
means_by_label = []
stds_by_label = []
mins_by_label = []
medians_by_label = []
maxes_by_label = []
welchs_by_label = []
num_crossings_by_label = []
first_slopes_by_label = []
second_slopes_by_label = []

for label in range(1, 6):
    means = []
    stds = []
    mins = []
    medians = []
    maxes = []
    welchs = []
    num_crossings = []
    first_slopes = []
    second_slopes = []
    chunk_matrix = df.loc[df['y'] == label, 'X1':'X178'].values
    for chunk in chunk_matrix:
        stats = pd.Series(chunk).describe()
        means.append(stats['mean'])
        stds.append(stats['std'])
        mins.append(stats['min'])
        medians.append(stats['50%'])
        maxes.append(stats['max'])
        welchs.append(fc.mean(welch(chunk, nperseg=178)))
        num_crossings.append(fc.number_crossing_m(chunk, stats['mean']))
        slope, _, _, _, _ = linregress(x=range(89), y=chunk[:89])
        first_slopes.append(slope)
        slope, _, _, _, _ = linregress(x=range(89), y=chunk[89:])
        second_slopes.append(slope)
    means_by_label.append(means)
    stds_by_label.append(stds)
    mins_by_label.append(mins)
    medians_by_label.append(medians)
    maxes_by_label.append(maxes)
    welchs_by_label.append(welchs)
    num_crossings_by_label.append(num_crossings)
    first_slopes_by_label.append(first_slopes)
    second_slopes_by_label.append(second_slopes)
    
fig = plt.figure(figsize=(12, 6))
for m, i in zip(means_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Average Value Distribution')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()
    
fig = plt.figure(figsize=(12, 6))
for s, i in zip(stds_by_label, range(5)):
    sns.kdeplot(s, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Std Dev Distribution')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(mins_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Min Value Distribution')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(medians_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Median Value Distribution')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(maxes_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Max Value Distribution')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(welchs_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title("Average Welch's Method Score Distribution")
plt.xlabel('Power Spectral Density (log10)')
plt.ylabel('Frequency')
plt.xscale('log')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(num_crossings_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Distribution of Count of Adjacent Values Crossing the Mean')
plt.xlabel('Count')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(first_slopes_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('First Half Slope Distribution')
plt.xlabel('Cranial Activity over Time (MicroVolts/178th Seconds)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
for m, i in zip(second_slopes_by_label, range(5)):
    sns.kdeplot(m, color=colors[i], label='Label: {}'.format(str(i+1)))
plt.title('Second Half Slope Distribution')
plt.xlabel('Cranial Activity over Time (MicroVolts/178th Seconds)')
plt.ylabel('Frequency')
plt.show()

## Chunks for seizure vs non-seizure

In [None]:
label_df = df.loc[df['y'] == 1, 'X1':'X178'].transpose()
ax = plt.figure(figsize=(12, 6))
for col in label_df.columns.values:
    ax = plt.plot(label_df[col], alpha=0.05, color='red')
plt.xticks(ticks)
plt.title('Density of EEG Signal for pattern 1: Seizure')
plt.xlabel('Time (178ths of a second)')
plt.ylabel('Cranial Activity (MicroVolts)')
plt.show()

label_df = df.loc[df['y'] != 1, 'X1':'X178'].transpose()
ax = plt.figure(figsize=(12, 6))
for col in label_df.columns.values:
    ax = plt.plot(label_df[col], alpha=0.05, color='blue')
plt.xticks(ticks)
plt.title('Density of EEG Signal for patterns  2, 3, 4, 5: non-Seizure')
plt.xlabel('Time (178ths of a second)')
plt.ylabel('Cranial Activity (MicroVolts)')
plt.show()

## Descriptive statistics for chunks by seizure (red) vs non-seizure (blue)

In [None]:
means_seizure = []
stds_seizure = []
mins_seizure = []
medians_seizure = []
maxes_seizure = []
welchs_seizure = []
num_crossings_seizure = []
first_slopes_seizure = []
second_slopes_seizure = []
chunk_matrix = df.loc[df['y'] == 1, 'X1':'X178'].values
for chunk in chunk_matrix:
    stats = pd.Series(chunk).describe()
    means_seizure.append(stats['mean'])
    stds_seizure.append(stats['std'])
    mins_seizure.append(stats['min'])
    medians_seizure.append(stats['50%'])
    maxes_seizure.append(stats['max'])
    welchs_seizure.append(fc.mean(welch(chunk, nperseg=178)))
    num_crossings_seizure.append(fc.number_crossing_m(chunk, stats['mean']))
    slope, _, _, _, _ = linregress(x=range(89), y=chunk[:89])
    first_slopes_seizure.append(slope)
    slope, _, _, _, _ = linregress(x=range(89), y=chunk[89:])
    second_slopes_seizure.append(slope)
    
means_non_seizure = []
stds_non_seizure = []
mins_non_seizure = []
medians_non_seizure = []
maxes_non_seizure = []
welchs_non_seizure = []
num_crossings_non_seizure = []
first_slopes_non_seizure = []
second_slopes_non_seizure = []
chunk_matrix = df.loc[df['y'] != 1, 'X1':'X178'].values
for chunk in chunk_matrix:
    stats = pd.Series(chunk).describe()
    means_non_seizure.append(stats['mean'])
    stds_non_seizure.append(stats['std'])
    mins_non_seizure.append(stats['min'])
    medians_non_seizure.append(stats['50%'])
    maxes_non_seizure.append(stats['max'])
    welchs_non_seizure.append(fc.mean(welch(chunk, nperseg=178)))
    num_crossings_non_seizure.append(fc.number_crossing_m(chunk, stats['mean']))
    slope, _, _, _, _ = linregress(x=range(89), y=chunk[:89])
    first_slopes_non_seizure.append(slope)
    slope, _, _, _, _ = linregress(x=range(89), y=chunk[89:])
    second_slopes_non_seizure.append(slope)

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(means_seizure, color='red', label='Seizure')
sns.kdeplot(means_non_seizure, color='blue', label='Non-Seizure')
plt.title('Average Value Distribution: Seizure vs. non-Seizure')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(stds_seizure, color='red', label='Seizure')
sns.kdeplot(stds_non_seizure, color='blue', label='Non-Seizure')
plt.title('Std Dev Distribution: Seizure vs. non-Seizure')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(mins_seizure, color='red', label='Seizure')
sns.kdeplot(mins_non_seizure, color='blue', label='Non-Seizure')
plt.title('Min Value Distribution: Seizure vs. non-Seizure')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(medians_seizure, color='red', label='Seizure')
sns.kdeplot(medians_non_seizure, color='blue', label='Non-Seizure')
plt.title('Median Value Distribution: Seizure vs. non-Seizure')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(maxes_seizure, color='red', label='Seizure')
sns.kdeplot(maxes_non_seizure, color='blue', label='Non-Seizure')
plt.title('Max Value Distribution: Seizure vs. non-Seizure')
plt.xlabel('Cranial Activity (MicroVolts)')
plt.ylabel('Frequency')
plt.show()
                         
fig = plt.figure(figsize=(12, 6))
sns.kdeplot(welchs_seizure, color='red', label='Seizure')
sns.kdeplot(welchs_non_seizure, color='blue', label='Non-Seizure')
plt.title("Average Welch's Method Score Distribution")
plt.xlabel('Power Spectral Density (log10)')
plt.ylabel('Frequency')
plt.xscale('log')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(num_crossings_seizure, color='red', label='Seizure')
sns.kdeplot(num_crossings_non_seizure, color='blue', label='Non-Seizure')
plt.title('Distribution of Count of Adjacent Values Crossing the Mean')
plt.xlabel('Count')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(first_slopes_seizure, color='red', label='Seizure')
sns.kdeplot(first_slopes_non_seizure, color='blue', label='Non-Seizure')
plt.title('First Half Slope Distribution')
plt.xlabel('Cranial Activity over Time (MicroVolts/178th Seconds)')
plt.ylabel('Frequency')
plt.show()

fig = plt.figure(figsize=(12, 6))
sns.kdeplot(second_slopes_seizure, color='red', label='Seizure')
sns.kdeplot(second_slopes_non_seizure, color='blue', label='Non-Seizure')
plt.title('Second Half Slope Distribution')
plt.xlabel('Cranial Activity over Time (MicroVolts/178th Seconds)')
plt.ylabel('Frequency')
plt.show()