In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Libraries Imported Successfully")

In [None]:
# Load the dataset
# Note: Kaggle usually mounts datasets under /kaggle/input/
try:
    # Attempt to locate the file automatically
    import os
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            print(f"Found file: {os.path.join(dirname, filename)}")
            if filename.endswith('.csv'):
                file_path = os.path.join(dirname, filename)
    
    df = pd.read_csv(file_path)
    print(f"\nDataset loaded. Shape: {df.shape}")

except Exception as e:
    print("Could not automatically find path. Please verify the dataset is added.")
    # Fallback path (Update this if specific path differs)
    # df = pd.read_csv('/kaggle/input/eeg-dataset/EEG_Scaled_data.csv') 

# Display first few rows
print(df.head())

In [None]:
# Separate Features and Target
# Assuming the target is the last column based on standard Kaggle datasets
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Check classes
classes = np.unique(y)
print(f"Classes found: {classes}")

# Encode labels to 0 and 1 if they aren't already
le = LabelEncoder()
y = le.fit_transform(y)

# --- PLOT FEATURES (Average Signal per Class) ---
# This fulfills "plot features as well"
plt.figure(figsize=(15, 6))

# Plot average signal for Non-Epileptic
class_0_idx = np.where(y == 0)[0]
plt.plot(X[class_0_idx].mean(axis=0), label='Non-Epileptic (Average)', color='blue', alpha=0.7)

# Plot average signal for Epileptic
class_1_idx = np.where(y == 1)[0]
plt.plot(X[class_1_idx].mean(axis=0), label='Epileptic (Average)', color='red', alpha=0.7)

plt.title("Feature Plot: Average EEG Signal Amplitude by Class")
plt.xlabel("Time / Feature Index")
plt.ylabel("Signal Amplitude")
plt.legend()
plt.grid(True)
plt.show()

# NOTE ON TOPOGRAPHIC MAPS:
# Topographic maps (Topoplots) require 3D spatial coordinates (e.g., Fp1, C3, Oz) for every column.
# Since this dataset contains flattened data (36k+ columns) without explicit channel headers in the CSV,
# we cannot generate an accurate topographic map. The plot above serves as the feature visualization.

In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, 
    BatchNormalization, 
    MaxPooling1D, 
    GlobalAveragePooling1D, 
    Dense, 
    Dropout
)
from tensorflow.keras.callbacks import EarlyStopping

# ------------------ DOWNSAMPLE & PCA ------------------
# Reduce 36k+ features to ~1024 for faster training
pca = PCA(n_components=6000)
X_reduced = pca.fit_transform(X)

# Reshape for Conv1D: (samples, timesteps, 1)
X_cnn = X_reduced.reshape(X_reduced.shape[0], X_reduced.shape[1], 1)
print(f"Shape after PCA & reshape: {X_cnn.shape}")

# ------------------ TRAIN/TEST SPLIT ------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_cnn, y, test_size=0.2, stratify=y, random_state=42
)

# Compute class weights for imbalance
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))
print(f"Class Weights: {class_weight_dict}")

# ------------------ BUILD CNN MODEL ------------------
def build_cnn(input_shape):
    model = Sequential()
    model.add(Conv1D(16, kernel_size=8, strides=2, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(4))

    model.add(Conv1D(32, kernel_size=4, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(4))

    model.add(GlobalAveragePooling1D())  # Faster than Flatten
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_cnn(X_train.shape[1:])
model.summary()

In [None]:
# ------------------ TRAIN MODEL ------------------
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32, 
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict,
    callbacks=[early_stopping],
    verbose=1
)

In [None]:
# Plot Training vs Validation Loss
plt.figure(figsize=(14, 5))

# Subplot 1: Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.title('Model Loss (Check for Overfitting)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Subplot 2: Accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.savefig('training_history_plot.png', dpi=300)
plt.show()

# Textual check for overfitting
val_loss = history.history['val_loss']
if val_loss[-1] > min(val_loss) + 0.05:
    print("\nWARNING: Potential Overfitting detected (Validation loss increased).")
else:
    print("\nModel training looks stable (Validation loss is low).")

In [None]:
# Predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Metrics
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("-" * 30)
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("-" * 30)

# Full Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Confusion Matrix Plot
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Epileptic', 'Epileptic'], yticklabels=['Non-Epileptic', 'Epileptic'])
plt.title('Confusion Matrix')
plt.ylabel('Actual Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png', dpi=300)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import welch, spectrogram

# --- CONFIGURATION ---
fs = 173.61  # Sampling rate of CHB-MIT dataset is typically 256Hz or 173.61Hz. 
             # We will use 173.61Hz which is common for the processed version of this dataset.
             # If unsure, standard medical EEG is often analyzed normalized, but this scaling works for visualization.

# 1. Select One Sample from Each Class for Comparison
# Find index of a seizure and non-seizure sample
idx_seizure = np.where(y == 1)[0][0]      # First seizure sample
idx_normal = np.where(y == 0)[0][0]       # First normal sample

# Get the data (reshaping to 1D array for signal analysis)
sig_seizure = X[idx_seizure]
sig_normal = X[idx_normal]

# --- PLOT 1: POWER SPECTRAL DENSITY (PSD) ---
# This fulfills "Band Frequencies"
f_seizure, psd_seizure = welch(sig_seizure, fs=fs, nperseg=1024)
f_normal, psd_normal = welch(sig_normal, fs=fs, nperseg=1024)

plt.figure(figsize=(12, 6))
plt.semilogy(f_normal, psd_normal, label='Non-Epileptic (Normal)', color='blue', alpha=0.7)
plt.semilogy(f_seizure, psd_seizure, label='Epileptic Seizure', color='red', alpha=0.7)
plt.title('Power Spectral Density (PSD) Comparison')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power (Log Scale)')
plt.legend()
plt.grid(True, which="both", ls="-", alpha=0.5)
plt.xlim(0, 50)  # Focus on 0-50Hz range where brain waves happen
plt.show()

# --- PLOT 2: BRAIN BAND POWERS (Bar Chart) ---
# This fulfills "Band Powers"
# Define bands: Delta (0.5-4), Theta (4-8), Alpha (8-13), Beta (13-30), Gamma (30+)
bands = {'Delta': (0.5, 4), 'Theta': (4, 8), 'Alpha': (8, 13), 'Beta': (13, 30), 'Gamma': (30, 50)}

def get_band_power(freqs, psd, band_range):
    idx = np.logical_and(freqs >= band_range[0], freqs <= band_range[1])
    return np.trapz(psd[idx], freqs[idx])

# Calculate powers
powers_seizure = [get_band_power(f_seizure, psd_seizure, band) for band in bands.values()]
powers_normal = [get_band_power(f_normal, psd_normal, band) for band in bands.values()]

# Plot Bar Chart
x = np.arange(len(bands))
width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x - width/2, powers_normal, width, label='Normal', color='blue', alpha=0.6)
plt.bar(x + width/2, powers_seizure, width, label='Seizure', color='red', alpha=0.6)
plt.xticks(x, bands.keys())
plt.ylabel('Relative Power')
plt.title('Brain Band Power Distribution (Energy per Frequency Band)')
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('Band_Power_Distribution.png', dpi=300)
plt.show()

# --- PLOT 3: SPECTROGRAM (Time-Frequency Map) ---
# This is a great alternative to Topographic maps for single-channel/flattened data
f, t, Sxx = spectrogram(sig_seizure, fs=fs, nperseg=256, noverlap=128)

plt.figure(figsize=(12, 5))
plt.pcolormesh(t, f, 10 * np.log10(Sxx), shading='gouraud', cmap='inferno')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.title('Spectrogram of Epileptic Seizure (Time vs Frequency Intensity)')
plt.colorbar(label='Intensity (dB)')
plt.ylim(0, 50)  # Zoom in on relevant frequencies
plt.savefig('Spectogram.png', dpi=300)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# 1. Extract Accuracy Data
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs_range = range(1, len(train_acc) + 1)

# 2. Create the Plot
plt.figure(figsize=(10, 6))

# Plot Training Accuracy
plt.plot(epochs_range, train_acc, label='Training Accuracy', color='blue', linewidth=2, marker='o', markersize=4)

# Plot Validation (Test) Accuracy
plt.plot(epochs_range, val_acc, label='Validation (Test) Accuracy', color='orange', linewidth=2, marker='o', markersize=4)

# 3. Add Labels and Title
plt.title('Training vs Validation Accuracy', fontsize=16, fontweight='bold')
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(loc='lower right', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)

# 4. Save the Plot
# saves to the /kaggle/working/ directory
plt.savefig('train_vs_test_accuracy.png', dpi=300, bbox_inches='tight')
print("Plot saved as 'train_vs_test_accuracy.png'")

# 5. Show the Plot
plt.show()