In [1]:
import pandas as pd

def load_and_concatenate_csv(file_list):
    """
    Loads multiple CSV files and concatenates them into a single DataFrame.
    
    Parameters:
    file_list (list of str): List of CSV file paths.
    
    Returns:
    pd.DataFrame: Concatenated DataFrame with all data from the files.
    """
    df_list = []
    
    for file in file_list:
        try:
            df = pd.read_csv(file)
            df_list.append(df)
        except Exception as e:
            print(f"Error loading {file}: {e}")
    
    if df_list:
        return pd.concat(df_list, ignore_index=True)
    else:
        return pd.DataFrame()  # Return an empty DataFrame if no files were loaded

# Example usage:
files = [
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eybrows quick + garbage_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eyebrows and hold_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\reading outloud-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\smiling-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\squinting-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\blinks_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye gaze left right 1_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye gaze left right 2_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye movements up down-labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\noise-labeled-af.csv"
]

combined_df = load_and_concatenate_csv(files)


  from pandas.core import (


In [2]:
combined_df = combined_df.drop(columns=['timestamp'])

In [3]:
# Ensure 'label' column is a string type to avoid comparison issues
combined_df['label'] = combined_df['label'].astype(str)

# Apply the relabeling function only to the 'label' column
combined_df['label'] = combined_df['label'].apply(lambda x: 1 if x.lower() == 'blink' else 0)


In [4]:
# Count total rows (samples)
total_samples = len(combined_df)

# Count the number of blinks
num_blinks = combined_df['label'].sum()

# Calculate percentage of blinks
blink_percentage = (num_blinks / total_samples) * 100 if total_samples > 0 else 0

print(f"Percentage of blinks in the dataset: {blink_percentage:.2f}%")


Percentage of blinks in the dataset: 3.63%


In [5]:
import numpy as np

# Find the indices where blinks occur
blink_indices = np.where(combined_df['label'].values == 1)[0]

# Identify consecutive sequences
if len(blink_indices) > 0:
    blink_durations = []
    start = blink_indices[0]
    
    for i in range(1, len(blink_indices)):
        if blink_indices[i] != blink_indices[i - 1] + 1:  # If not consecutive
            blink_durations.append(blink_indices[i - 1] - start + 1)
            start = blink_indices[i]  # Start a new blink event

    # Add the last detected blink duration
    blink_durations.append(blink_indices[-1] - start + 1)

    # Compute average blink length and standard deviation
    avg_blink_length = np.mean(blink_durations)
    std_blink_length = np.std(blink_durations)

    print(f"Average blink duration: {avg_blink_length:.2f} samples")
    print(f"Standard deviation of blink duration: {std_blink_length:.2f} samples")
else:
    print("No blinks found in the dataset.")


Average blink duration: 152.89 samples
Standard deviation of blink duration: 25.66 samples


In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

# Parameters
num_channels = 16
window_size = 150
step_size = 30
image_height = 100
min_val, max_val = -100, 100
blink_threshold = 0.8  
train_ratio, val_ratio, test_ratio = 0.7, 0.15, 0.15  

# Extract EMG data and labels
emg_data = combined_df.iloc[:, :-1].values  
labels = combined_df['label'].values  

# Create overlapping windows
def create_windows(emg_data, labels, window_size, step_size, blink_threshold):
    """Generates overlapping windows from EMG data."""
    X, y = [], []
    num_samples = len(emg_data)

    for start in range(0, num_samples - window_size, step_size):
        window_data = emg_data[start:start + window_size, :]  
        window_labels = labels[start:start + window_size]  

        blink_ratio = np.mean(window_labels == 1)
        window_label = 1 if blink_ratio >= blink_threshold else 0

        X.append(window_data)
        y.append(window_label)

    return np.array(X), np.array(y)

X_windows, y_windows = create_windows(emg_data, labels, window_size, step_size, blink_threshold)

# Convert EMG signals to images
def normalize_and_convert_to_image(signal):
    """ Converts EMG window to an image of size (100, 150). """
    normalized_signal = (signal - min_val) / (max_val - min_val)
    normalized_signal = np.clip(normalized_signal, 0, 1)
    image = np.zeros((image_height, window_size))
    for t in range(window_size):
        pixel_y = int(normalized_signal[t] * (image_height - 1))
        image[pixel_y, t] = 1
    return image

# Convert each window to an image for each channel
X_images = np.zeros((len(X_windows), num_channels, image_height, window_size, 1), dtype=np.float32)
for i in range(len(X_windows)):
    for ch in range(num_channels):
        X_images[i, ch, :, :, 0] = normalize_and_convert_to_image(X_windows[i, :, ch])

# Train-Test-Validation split
X_train, X_temp, y_train, y_temp = train_test_split(X_images, y_windows, test_size=(val_ratio + test_ratio), stratify=y_windows, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=(test_ratio / (val_ratio + test_ratio)), stratify=y_temp, random_state=42)

# Memory-efficient oversampling (batch-wise)
def batch_oversample(X, y, batch_size=1000):
    """Performs oversampling in batches to avoid memory errors."""
    oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = [], []
    
    for start in range(0, len(X), batch_size):
        end = min(start + batch_size, len(X))
        X_batch = X[start:end].reshape(end - start, -1)  # Flatten batch
        y_batch = y[start:end]

        X_batch_res, y_batch_res = oversampler.fit_resample(X_batch, y_batch)
        X_resampled.append(X_batch_res.reshape(-1, num_channels, image_height, window_size, 1))
        y_resampled.append(y_batch_res)

    return np.concatenate(X_resampled, axis=0), np.concatenate(y_resampled, axis=0)

# Apply batch-wise oversampling
X_train_resampled, y_train_resampled = batch_oversample(X_train, y_train, batch_size=2000)

# Convert to list format for multi-channel CNN
X_train_list = [X_train_resampled[:, ch, :, :, :] for ch in range(num_channels)]
X_val_list = [X_val[:, ch, :, :, :] for ch in range(num_channels)]
X_test_list = [X_test[:, ch, :, :, :] for ch in range(num_channels)]

# Compute class weights
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train_resampled), y=y_train_resampled)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

print(f"Training data shape: {X_train_resampled.shape}, Labels shape: {y_train_resampled.shape}")
print(f"Class weights: {class_weight_dict}")



Training data shape: (14500, 16, 100, 150, 1), Labels shape: (14500,)
Class weights: {0: 1.0, 1: 1.0}


In [7]:

from collections import Counter

# Count labels in the training set
class_counts = Counter(y_train_resampled)
print(f"Class Distribution After Oversampling: {class_counts}")

# Compute class weights manually
from sklearn.utils.class_weight import compute_class_weight
unique_classes = np.unique(y_train_resampled)
class_weights = compute_class_weight(class_weight="balanced", classes=unique_classes, y=y_train_resampled)
class_weight_dict = {c: w for c, w in zip(unique_classes, class_weights)}

print(f"Computed Class Weights: {class_weight_dict}")


Class Distribution After Oversampling: Counter({0: 7250, 1: 7250})
Computed Class Weights: {0: 1.0, 1: 1.0}


In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Model Parameters
input_shape = (image_height, window_size, 1)  # Each input image shape (100x150, 1 channel)
num_classes = 2  # Binary classification (Blink or No Blink)

def build_multi_channel_cnn():
    """CNN Model for Multi-Channel EMG-Based Blink Detection"""
    inputs = [keras.Input(shape=input_shape) for _ in range(num_channels)]  # 16 separate inputs

    conv_blocks = []
    for inp in inputs:
        x = layers.Conv2D(16, (3, 3), activation="relu", padding="same")(inp)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.Flatten()(x)
        conv_blocks.append(x)

    # Merge outputs from all channels
    merged = layers.Concatenate()(conv_blocks)
    x = layers.Dense(128, activation="relu")(merged)
    x = layers.Dropout(0.3)(x)  # Reduce overfitting
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Create the model
multi_channel_cnn = build_multi_channel_cnn()
multi_channel_cnn.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 100, 150, 1  0           []                               
                                )]                                                                
                                                                                                  
 input_2 (InputLayer)           [(None, 100, 150, 1  0           []                               
                                )]                                                                
                                                                                                  
 input_3 (InputLayer)           [(None, 100, 150, 1  0           []                               
                                )]                                                            

                                                                                                  
 conv2d_22 (Conv2D)             (None, 100, 150, 16  160         ['input_12[0][0]']               
                                )                                                                 
                                                                                                  
 conv2d_24 (Conv2D)             (None, 100, 150, 16  160         ['input_13[0][0]']               
                                )                                                                 
                                                                                                  
 conv2d_26 (Conv2D)             (None, 100, 150, 16  160         ['input_14[0][0]']               
                                )                                                                 
                                                                                                  
 conv2d_28

 conv2d_25 (Conv2D)             (None, 50, 75, 32)   4640        ['max_pooling2d_24[0][0]']       
                                                                                                  
 conv2d_27 (Conv2D)             (None, 50, 75, 32)   4640        ['max_pooling2d_26[0][0]']       
                                                                                                  
 conv2d_29 (Conv2D)             (None, 50, 75, 32)   4640        ['max_pooling2d_28[0][0]']       
                                                                                                  
 conv2d_31 (Conv2D)             (None, 50, 75, 32)   4640        ['max_pooling2d_30[0][0]']       
                                                                                                  
 max_pooling2d_1 (MaxPooling2D)  (None, 25, 37, 32)  0           ['conv2d_1[0][0]']               
                                                                                                  
 max_pooli

 concatenate (Concatenate)      (None, 473600)       0           ['flatten[0][0]',                
                                                                  'flatten_1[0][0]',              
                                                                  'flatten_2[0][0]',              
                                                                  'flatten_3[0][0]',              
                                                                  'flatten_4[0][0]',              
                                                                  'flatten_5[0][0]',              
                                                                  'flatten_6[0][0]',              
                                                                  'flatten_7[0][0]',              
                                                                  'flatten_8[0][0]',              
                                                                  'flatten_9[0][0]',              
          

In [9]:
# Train the CNN with memory-efficient data loading
history = multi_channel_cnn.fit(
    X_train_list, y_train_resampled,
    epochs=2, batch_size=32,
    validation_data=(X_val_list, y_val),
    class_weight=class_weight_dict  # Handles any remaining imbalance
)


Epoch 1/2
Epoch 2/2
 57/454 [==>...........................] - ETA: 13:43 - loss: 0.0014 - accuracy: 0.9995

KeyboardInterrupt: 

In [10]:
# Evaluate on test set
test_loss, val_acc = multi_channel_cnn.evaluate(X_val_list, y_val)
print(f"Test Accuracy: {test_acc:.4f}")




NameError: name 'test_acc' is not defined

In [12]:
import numpy as np
import pandas as pd

# Example usage:
files = [
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eybrows quick + garbage_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eyebrows and hold_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\reading outloud-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\smiling-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\squinting-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\blinks_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye gaze left right 1_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye gaze left right 2_labeled_af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\eye movements up down-labeled-af.csv",
    r"C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\noise-labeled-af.csv"
]

# Function to count blink sequences in a dataset
def count_blink_sequences(labels):
    """Counts consecutive sequences of blinks (1s) in the label column."""
    blink_sequences = []
    count = 0

    for i in range(1, len(labels)):
        if labels[i] == 1 and labels[i - 1] == 1:
            count += 1  # Continuing a blink sequence
        elif labels[i] == 1:  # New blink sequence starts
            count = 1
        else:  # End of blink sequence
            if count > 0:
                blink_sequences.append(count)
            count = 0

    if count > 0:  # Capture last sequence if it ends at the last sample
        blink_sequences.append(count)

    return len(blink_sequences)  # Return number of blink sequences

# Dictionary to store blink sequence counts for each file
file_blink_counts = {}

# Process each file separately
file_data = {}

for file in files:
    df = pd.read_csv(file)
    
    # Ensure 'label' column is a string type to avoid comparison issues
    df['label'] = df['label'].astype(str)
    
    # Apply the relabeling function only to the 'label' column
    df['label'] = df['label'].apply(lambda x: 1 if x.lower() == 'blink' else 0)
    
    # Drop the timestamp column
    df = df.drop(columns=['timestamp'])
    
    num_blink_sequences = count_blink_sequences(df['label'].values)
    file_blink_counts[file] = num_blink_sequences
    file_data[file] = df  # Store the full dataset

# Print blink sequence distribution
print("Blink Sequences Per File:")
for file, count in file_blink_counts.items():
    print(f"{file}: {count} sequences")



Blink Sequences Per File:
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eybrows quick + garbage_labeled_af.csv: 3 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\raise eyebrows and hold_labeled_af.csv: 0 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\reading outloud-labeled-af.csv: 0 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\smiling-labeled-af.csv: 0 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\squinting-labeled-af.csv: 3 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרויקט Blinkaid\data from subject1 - Yonathan\tagged_data\after_format\blinks_labeled_af.csv: 57 sequences
C:\Users\user\OneDrive - post.bgu.ac.il\פרוי