<a href="https://colab.research.google.com/github/potuu/Implementation_of_A_Domain_Generative_Graph_Network_for_EEG_Based_Emotion_Recognition/blob/main/1DCNNforEEG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import os
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [31]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
def process_eeg_directory(directory_path):
    """
    Processes all subject and experiment files in a directory, groups data by `user_id`,
    adds a unique ID based on the file name, and combines all grouped data into a single DataFrame.

    Args:
    - directory_path (str): Path to the directory containing EEG data files.

    Returns:
    - combined_data (pd.DataFrame): A single DataFrame containing all grouped data.
    """
    grouped_data = []

    # List all files in the directory
    file_list = [f for f in os.listdir(directory_path) if f.endswith('.csv')]

    # Loop through each file
    for file_name in file_list:
        file_path = os.path.join(directory_path, file_name)

        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)

        # Check if `user_id` column exists
        if 'user_id' in df.columns:
            # Add a column to indicate the source file (unique ID for rows from this file)
            df['source_file'] = os.path.splitext(file_name)[0]  # File name without extension

            # Group by `user_id` and aggregate data into lists
            grouped = df.groupby('user_id').agg(list).reset_index()

            # Preserve the unique ID (source file) in the grouped data
            grouped['source_file'] = os.path.splitext(file_name)[0]

            # Append the grouped data to the list
            grouped_data.append(grouped)
        else:
            print(f"'user_id' column not found in file: {file_name}")

    # Combine all grouped data into a single DataFrame
    if grouped_data:
        combined_data = pd.concat(grouped_data, ignore_index=True)
        print("All files have been grouped by `user_id` and combined successfully.")
    else:
        combined_data = pd.DataFrame()
        print("No data was processed or combined.")

    return combined_data


# Example usage
directory_path = "/content/drive/MyDrive/eeg_database_kou/data_trail_csv"  # Replace with your actual directory path
combined_eeg_data = process_eeg_directory(directory_path)

# Save the combined data to a CSV file (optional)
combined_eeg_data.to_csv("grouped_combined_eeg_data.csv", index=False)

# Display a preview of the combined data
print("Combined EEG Data:")
print(combined_eeg_data.head())

All files have been grouped by `user_id` and combined successfully.
Combined EEG Data:
                                user_id  \
0  06f49575-a0ee-46fd-8d0d-4133886d62a5   
1  4737537c-0a5d-46e2-9c1f-9872520872c4   
2  4737537c-0a5d-46e2-9c1f-9872520872c4   
3  06f49575-a0ee-46fd-8d0d-4133886d62a5   
4  06f49575-a0ee-46fd-8d0d-4133886d62a5   

                                           Timestamp  \
0  [10:51:23 AM, 10:51:23 AM, 10:51:23 AM, 10:51:...   
1  [10:39:08 AM, 10:39:08 AM, 10:39:08 AM, 10:39:...   
2  [10:40:17 AM, 10:40:17 AM, 10:40:17 AM, 10:40:...   
3  [10:54:24 AM, 10:54:24 AM, 10:54:24 AM, 10:54:...   
4  [10:54:59 AM, 10:54:59 AM, 10:54:59 AM, 10:54:...   

                                             EEG.AF3  \
0  [3891.281982, 3895.384521, 3906.153809, 3900.0...   
1  [4017.435791, 4020.512939, 4015.384521, 4005.1...   
2  [4081.538574, 4083.589844, 4080.512939, 4074.3...   
3  [3981.025635, 3972.820557, 3965.641113, 3968.2...   
4  [3951.281982, 3953.846191, 3957.43

In [33]:
# Sort the DataFrame by `source_file` with proper numerical order
combined_eeg_data['source_file'] = combined_eeg_data['source_file'].str.split('_').map(lambda x: (int(x[0]), int(x[1])))
combined_eeg_data = combined_eeg_data.sort_values(by='source_file').reset_index(drop=True)

# Display sorted `source_file` column
print(combined_eeg_data['source_file'].head())

0    (1, 1)
1    (1, 2)
2    (1, 3)
3    (1, 4)
4    (1, 5)
Name: source_file, dtype: object


In [34]:
labels_data_path = "/content/drive/MyDrive/eeg_database_kou/label_processed_data.csv"
labels_data = pd.read_csv(labels_data_path)
split_columns = ["user_id", "trial", "start_date", "finish_date", "duration", "val", "aro", "dom"]
processed_labels_data = labels_data.iloc[:, 0].str.split(";", expand=True)
processed_labels_data.columns = split_columns

# Inspect the transformed data
print("Labels Preview:")
print(processed_labels_data.head())

# Check for missing values in label data
print("\nMissing Values in Label Data:")
print(processed_labels_data.isnull().sum())

Labels Preview:
                                user_id trial                  start_date  \
0  7a4efc4f-6def-4ba3-a56d-672aec0fe324     1  2023-10-11 12:45:06.863000   
1  7a4efc4f-6def-4ba3-a56d-672aec0fe324     2  2023-10-11 12:45:41.053000   
2  7a4efc4f-6def-4ba3-a56d-672aec0fe324     3  2023-10-11 12:46:15.225000   
3  7a4efc4f-6def-4ba3-a56d-672aec0fe324     4  2023-10-11 12:46:49.414000   
4  7a4efc4f-6def-4ba3-a56d-672aec0fe324     5  2023-10-11 12:47:33.596000   

                  finish_date duration val aro dom  
0  2023-10-11 12:45:35.888000       29   1   0   1  
1  2023-10-11 12:46:10.055000       29   0   1   1  
2  2023-10-11 12:46:44.240000       29   1   1   0  
3  2023-10-11 12:47:18.427000       29   0   1   1  
4  2023-10-11 12:48:02.605000       29   1   0   0  

Missing Values in Label Data:
user_id        0
trial          0
start_date     0
finish_date    0
duration       0
val            0
aro            0
dom            0
dtype: int64


In [35]:
import math

# Add the `source_file` column to processed_labels_data
processed_labels_data['source_file'] = processed_labels_data.apply(
    lambda row: (math.ceil((row.name + 1) / 20), int(row['trial'])),  # 20 trials per subject
    axis=1
)

# Inspect the updated DataFrame
print("Processed Labels with Source File:")
print(processed_labels_data.tail())


Processed Labels with Source File:
                                  user_id trial                  start_date  \
795  4737537c-0a5d-46e2-9c1f-9872520872c4    16  2023-12-14 10:37:49.814000   
796  4737537c-0a5d-46e2-9c1f-9872520872c4    17  2023-12-14 10:38:34.042000   
797  4737537c-0a5d-46e2-9c1f-9872520872c4    18  2023-12-14 10:39:08.306000   
798  4737537c-0a5d-46e2-9c1f-9872520872c4    19  2023-12-14 10:39:42.525000   
799  4737537c-0a5d-46e2-9c1f-9872520872c4    20  2023-12-14 10:40:16.724000   

                    finish_date duration val aro dom source_file  
795  2023-12-14 10:38:18.829000       29   0   1   1    (40, 16)  
796  2023-12-14 10:39:03.078000       29   1   1   1    (40, 17)  
797  2023-12-14 10:39:37.327000       29   0   1   1    (40, 18)  
798  2023-12-14 10:40:11.539000       29   1   1   1    (40, 19)  
799  2023-12-14 10:40:45.751000       29   0   1   1    (40, 20)  


In [36]:
from scipy.signal import butter, filtfilt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Bandpass Filter Function
def bandpass_filter(data, lowcut=1.0, highcut=50.0, fs=128.0, order=4):
    """
    Apply a bandpass filter to EEG data.

    Args:
    - data (ndarray): EEG data (timesteps x channels).
    - lowcut (float): Low cutoff frequency in Hz.
    - highcut (float): High cutoff frequency in Hz.
    - fs (float): Sampling frequency in Hz.
    - order (int): Filter order.

    Returns:
    - filtered_data (ndarray): Bandpass filtered EEG data.
    """
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    filtered_data = filtfilt(b, a, data, axis=0)
    return filtered_data

# Preprocess EEG Data without Labels
def preprocess_eeg_data(df, target_length=3712, lowcut=1.0, highcut=50.0, fs=128.0):
    """
    Preprocess EEG data by applying a bandpass filter and fixing the length.

    Args:
    - df (DataFrame): Raw EEG data DataFrame (columns are EEG channels).
    - target_length (int): Fixed length for time-series data.
    - lowcut (float): Low cutoff frequency for the filter.
    - highcut (float): High cutoff frequency for the filter.
    - fs (float): Sampling frequency.

    Returns:
    - data (ndarray): Preprocessed EEG data of shape (samples, timesteps, channels).
    """
    n_samples = len(df)
    n_channels = len(df.columns)  # Assuming all columns are EEG channels
    data = np.zeros((n_samples, target_length, n_channels))

    for i in range(n_samples):
        for j in range(n_channels):
            channel_data = df.iloc[i, j]
            if isinstance(channel_data, str):
                channel_data = eval(channel_data)  # Convert string to list

            # Apply bandpass filter
            channel_data = bandpass_filter(np.array(channel_data), lowcut, highcut, fs)

            # Adjust length
            if len(channel_data) < target_length:
                channel_data = np.pad(channel_data, (0, target_length - len(channel_data)), 'constant')
            else:
                channel_data = channel_data[:target_length]

            data[i, :, j] = channel_data

    return data

# Min-Max Scaling
def apply_min_max_scaling(data):
    """
    Apply Min-Max scaling to EEG data.

    Args:
    - data (ndarray): EEG data (samples x timesteps x channels).

    Returns:
    - scaled_data (ndarray): Scaled EEG data.
    """
    scaler = MinMaxScaler(feature_range=(0, 1))
    n_samples, n_timesteps, n_channels = data.shape
    scaled_data = np.zeros_like(data)

    # Scale each channel independently
    for ch in range(n_channels):
        reshaped_data = data[:, :, ch].reshape(-1, 1)  # Flatten for scaling
        scaled_channel = scaler.fit_transform(reshaped_data).reshape(n_samples, n_timesteps)
        scaled_data[:, :, ch] = scaled_channel

    return scaled_data

In [37]:
# Ensure 'source_file' is in tuple format for both DataFrames
combined_eeg_data['source_file'] = combined_eeg_data['source_file'].apply(
    lambda x: eval(x) if isinstance(x, str) else x
)
processed_labels_data['source_file'] = processed_labels_data['source_file'].apply(
    lambda x: eval(x) if isinstance(x, str) else x
)

# Merge combined_eeg_data with processed_labels_data based on 'source_file'
merged_data = combined_eeg_data.merge(
    processed_labels_data[['source_file', 'aro', 'val']],
    on='source_file',
    how='left'  # Use 'left' join to keep all rows from combined_eeg_data
)

# Inspect the resulting DataFrame
print("Merged Data (with Labels):")
print(merged_data.head())

# Save the merged DataFrame to a new CSV file (optional)
#merged_data.to_csv("merged_eeg_with_labels.csv", index=False)

Merged Data (with Labels):
                                user_id  \
0  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
1  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
2  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
3  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
4  7a4efc4f-6def-4ba3-a56d-672aec0fe324   

                                           Timestamp  \
0  [12:45:07 PM, 12:45:07 PM, 12:45:07 PM, 12:45:...   
1  [12:45:41 PM, 12:45:41 PM, 12:45:41 PM, 12:45:...   
2  [12:46:15 PM, 12:46:15 PM, 12:46:15 PM, 12:46:...   
3  [12:46:49 PM, 12:46:49 PM, 12:46:49 PM, 12:46:...   
4  [12:47:34 PM, 12:47:34 PM, 12:47:34 PM, 12:47:...   

                                             EEG.AF3  \
0  [4018.461426, 4014.871826, 4011.794922, 4010.2...   
1  [3984.615479, 3990.256348, 3988.205078, 3980.5...   
2  [3985.641113, 3987.692383, 3985.641113, 3985.6...   
3  [4014.358887, 4011.794922, 4015.897461, 4015.3...   
4  [3727.692383, 3735.384521, 3740.512939, 3735.3...   

                                        

In [42]:
num_user_ids = len(merged_data)

print(f"Number of samples: {num_user_ids}")

Number of samples: 800


In [43]:
merged_data.shape

(800, 19)

In [44]:
# 384. satırdaki EEG verilerini yazdırmak
eeg_384 = merged_data[eeg_columns].iloc[383]  # Pandas DataFrame'deki satırlar 0'dan başlar, bu yüzden 384. satır 383. indekse karşılık gelir
print(eeg_384)


EEG.AF3    [4054.871826, 4058.974365, 4064.615479, 4063.5...
EEG.F7     [4228.205078, 4229.230957, 4227.692383, 4224.1...
EEG.F3     [3712.820557, 3715.384521, 3720.0, 3718.974365...
EEG.FC5    [4189.230957, 4189.230957, 4187.179688, 4189.2...
EEG.T7     [4064.615479, 4063.589844, 4062.05127, 4062.05...
EEG.P7     [4127.179688, 4128.205078, 4129.230957, 4128.2...
EEG.O1     [4149.230957, 4149.230957, 4153.846191, 4153.3...
EEG.O2     [4127.179688, 4124.102539, 4124.615234, 4129.2...
EEG.P8     [3911.281982, 3906.666748, 3907.692383, 3913.8...
EEG.T8     [4594.358887, 4591.282227, 4594.358887, 4598.9...
EEG.FC6    [4470.769043, 4470.256348, 4464.615234, 4464.6...
EEG.F4     [4556.410156, 4584.615234, 4598.974121, 4613.8...
EEG.F8     [4083.589844, 4084.615479, 4077.435791, 4073.3...
EEG.AF4    [4606.153809, 4607.692383, 4601.538574, 4597.4...
Name: 383, dtype: object


Veri Normalizasyonu

In [46]:
import pandas as pd
import numpy as np

# Örnek: merged_data veri çerçevesi
# merged_data = pd.read_csv("your_data.csv")  # Gerçek verinizle bu satırı kullanın

# EEG kanal isimleri
eeg_columns = [
    "EEG.AF3", "EEG.F7", "EEG.F3", "EEG.FC5", "EEG.T7", "EEG.P7",
    "EEG.O1", "EEG.O2", "EEG.P8", "EEG.T8", "EEG.FC6", "EEG.F4",
    "EEG.F8", "EEG.AF4"
]

# Normalizasyon fonksiyonu (min-max)
def normalize_eeg_data(data):
    min_val = np.min(data)
    max_val = np.max(data)
    return [(x - min_val) / (max_val - min_val) for x in data]

# merged_data üzerinde normalizasyon işlemi yapılacak
normalized_merged_data = merged_data.copy()  # Orijinal veri kopyalanır

# EEG sütunlarında normalizasyon yapılır
for column in eeg_columns:
    normalized_merged_data[column] = normalized_merged_data[column].apply(normalize_eeg_data)

# Sonuçları kontrol et
print(normalized_merged_data)


                                  user_id  \
0    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
1    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
2    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
3    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
4    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
..                                    ...   
795  4737537c-0a5d-46e2-9c1f-9872520872c4   
796  4737537c-0a5d-46e2-9c1f-9872520872c4   
797  4737537c-0a5d-46e2-9c1f-9872520872c4   
798  4737537c-0a5d-46e2-9c1f-9872520872c4   
799  4737537c-0a5d-46e2-9c1f-9872520872c4   

                                             Timestamp  \
0    [12:45:07 PM, 12:45:07 PM, 12:45:07 PM, 12:45:...   
1    [12:45:41 PM, 12:45:41 PM, 12:45:41 PM, 12:45:...   
2    [12:46:15 PM, 12:46:15 PM, 12:46:15 PM, 12:46:...   
3    [12:46:49 PM, 12:46:49 PM, 12:46:49 PM, 12:46:...   
4    [12:47:34 PM, 12:47:34 PM, 12:47:34 PM, 12:47:...   
..                                                 ...   
795  [10:37:50 AM, 10:37:50 AM, 10:37:50 AM, 10:37:..

In [47]:
# 275. satırı gösterme (0 tabanlı indeksleme olduğu için 274 yazmalıyız)
print(merged_data.iloc[274])

# Normalleştirilmiş veride 275. satırı gösterme
print(normalized_merged_data.iloc[274])


user_id                     b759387d-fc48-4fb8-94e6-a1a68da5a0ca
Timestamp      [03:50:35 PM, 03:50:35 PM, 03:50:35 PM, 03:50:...
EEG.AF3        [3971.794922, 3972.307617, 3964.615479, 3956.9...
EEG.F7         [4202.05127, 4199.487305, 4200.0, 4206.666504,...
EEG.F3         [3761.025635, 3763.076904, 3757.435791, 3755.3...
EEG.FC5        [4080.0, 4078.974365, 4076.410156, 4077.94873,...
EEG.T7         [4030.256348, 4030.769287, 4029.743652, 4026.6...
EEG.P7         [4120.512695, 4120.512695, 4118.461426, 4115.8...
EEG.O1         [4108.717773, 4110.256348, 4109.743652, 4107.1...
EEG.O2         [4056.410156, 4051.794922, 4046.153809, 4046.1...
EEG.P8         [3911.281982, 3909.743652, 3908.718018, 3908.7...
EEG.T8         [4554.871582, 4554.358887, 4546.666504, 4540.0...
EEG.FC6        [4533.846191, 4530.769043, 4521.538574, 4516.4...
EEG.F4         [4538.974121, 4540.512695, 4533.333496, 4526.6...
EEG.F8         [4163.077148, 4156.922852, 4147.692383, 4148.2...
EEG.AF4        [4615.3847

Band-Pass filter to Normalized Data

In [51]:
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt

# Band-pass filtresi tasarımı
def butter_bandpass(lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut, highcut, fs, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order)
    return filtfilt(b, a, data)

# Örnekleme frekansı (fs) ve band-pass filtre aralığı
fs = 500  # Örnekleme frekansı (Hz)
lowcut = 1  # Düşük geçiş frekansı (Hz)
highcut = 50  # Yüksek geçiş frekansı (Hz)

# Band-pass filtreyi her bir EEG kanalına uygulama
filtered_data = merged_data.copy()

for column in merged_data.columns[2:-3]:  # EEG kanal verileri
    # Veriyi numpy dizisine çevir ve her bir listede filtre uygula
    def apply_filter_to_list(lst):
        return bandpass_filter(np.array(lst), lowcut, highcut, fs)

    filtered_data[column] = merged_data[column].apply(apply_filter_to_list)



In [55]:
print(filtered_data)  # Filtrelenmiş verilerin ilk birkaç satırını yazdır

                                  user_id  \
0    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
1    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
2    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
3    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
4    7a4efc4f-6def-4ba3-a56d-672aec0fe324   
..                                    ...   
795  4737537c-0a5d-46e2-9c1f-9872520872c4   
796  4737537c-0a5d-46e2-9c1f-9872520872c4   
797  4737537c-0a5d-46e2-9c1f-9872520872c4   
798  4737537c-0a5d-46e2-9c1f-9872520872c4   
799  4737537c-0a5d-46e2-9c1f-9872520872c4   

                                             Timestamp  \
0    [12:45:07 PM, 12:45:07 PM, 12:45:07 PM, 12:45:...   
1    [12:45:41 PM, 12:45:41 PM, 12:45:41 PM, 12:45:...   
2    [12:46:15 PM, 12:46:15 PM, 12:46:15 PM, 12:46:...   
3    [12:46:49 PM, 12:46:49 PM, 12:46:49 PM, 12:46:...   
4    [12:47:34 PM, 12:47:34 PM, 12:47:34 PM, 12:47:...   
..                                                 ...   
795  [10:37:50 AM, 10:37:50 AM, 10:37:50 AM, 10:37:..

## FEATURE SELECTION
DMIM UYGULANMASI

In [61]:
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt

# Band-pass filtresi tasarımı
def butter_bandpass(lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut, highcut, fs, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order)
    return filtfilt(b, a, data)

# Örnekleme frekansı (fs) ve band-pass filtre aralığı
fs = 500  # Örnekleme frekansı (Hz)
lowcut = 1  # Düşük geçiş frekansı (Hz)
highcut = 50  # Yüksek geçiş frekansı (Hz)

# EEG verisini DataFrame'den alma
merged_data = filtered_data  # Verinizi yüklediğiniz yol

# EEG kanal isimleri (örneğin: 'EEG.AF3', 'EEG.F7', vb.)
eeg_columns = [col for col in merged_data.columns if col.startswith('EEG.')]

# Band-pass filtreyi her bir EEG kanalına uygulama
filtered_data = merged_data.copy()

for column in eeg_columns:
    def apply_filter_to_list(lst):
        # Listeyi numpy dizisine çevir ve band-pass filtreyi uygula
        filtered_signal = bandpass_filter(np.array(lst), lowcut, highcut, fs)
        return filtered_signal.tolist()  # Sonucu liste olarak döndür

    # Uygulama ve yeni değerleri atama
    filtered_data[column] = merged_data[column].apply(apply_filter_to_list)

# Filtrelenmiş veriyi kontrol etme
print(filtered_data.head())


                                user_id  \
0  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
1  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
2  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
3  7a4efc4f-6def-4ba3-a56d-672aec0fe324   
4  7a4efc4f-6def-4ba3-a56d-672aec0fe324   

                                           Timestamp  \
0  [12:45:07 PM, 12:45:07 PM, 12:45:07 PM, 12:45:...   
1  [12:45:41 PM, 12:45:41 PM, 12:45:41 PM, 12:45:...   
2  [12:46:15 PM, 12:46:15 PM, 12:46:15 PM, 12:46:...   
3  [12:46:49 PM, 12:46:49 PM, 12:46:49 PM, 12:46:...   
4  [12:47:34 PM, 12:47:34 PM, 12:47:34 PM, 12:47:...   

                                             EEG.AF3  \
0  [0.8989826146925198, -1.1010990085574113, -2.9...   
1  [-0.183680442631863, -0.8188087855567731, -1.4...   
2  [-0.0807311386854228, 1.263906933282155, 2.538...   
3  [7.04494745879572, 6.907241162669472, 7.078885...   
4  [-19.899709897960438, -17.086077120392645, -14...   

                                              EEG.F7  \
0  [11.6975

# 1DCNN MODEL

## BINARY MODEL

In [67]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, TensorDataset


In [68]:
# Binary EEG-CNN Modeli
class EEG_CNN_Binary(nn.Module):
    def __init__(self, input_size, num_classes=2):
        super(EEG_CNN_Binary, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.AvgPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(32 * (input_size // 2), 128)
        self.fc2 = nn.Linear(128, num_classes)  # Binary classification (2 classes)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.bn1(self.conv1(x))))
        x = x.view(-1, 32 * (x.size(2)))  # Flatten for fully connected layers
        x = self.dropout(nn.ReLU()(self.fc1(x)))
        x = self.fc2(x)
        return x


In [69]:
# Eğitim için hiperparametreler
input_size = 256
num_classes = 2  # Binary classification için 2 sınıf
batch_size = 32
epochs = 10
learning_rate = 0.001

# Model, loss function ve optimizer
model_binary = EEG_CNN_Binary(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()  # Binary için CrossEntropyLoss
optimizer = optim.Adam(model_binary.parameters(), lr=learning_rate)

# Dummy Dataset - Gerçek veriler yerine geçici veri kullanıyoruz
X_train = torch.randn(1000, 1, input_size)  # 1000 örnek
y_train_val = torch.randint(0, 2, (1000,))  # val için etiketler
y_train_aro = torch.randint(0, 2, (1000,))  # aro için etiketler

In [70]:
# DataLoader
train_dataset = TensorDataset(X_train, y_train_val, y_train_aro)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [71]:
# Evaluation function for calculating metrics
def evaluate_model(model, data_loader, target):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels_val, labels_aro in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            if target == "val":
                labels = labels_val
            else:
                labels = labels_aro

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    return accuracy, precision, recall, f1

In [72]:
# Eğitim döngüsü
for epoch in range(epochs):
    model_binary.train()
    running_loss = 0.0
    for inputs, labels_val, labels_aro in train_loader:
        optimizer.zero_grad()
        outputs = model_binary(inputs)

        # Her iki hedef için ayrı kayıplar hesaplanabilir
        loss_val = criterion(outputs, labels_val)  # val için kayıp
        loss_aro = criterion(outputs, labels_aro)  # aro için kayıp

        # Kayıpların toplamı
        loss = loss_val + loss_aro
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Epoch sonrasında metrikleri hesapla (val ve aro için)
    val_acc, val_prec, val_rec, val_f1 = evaluate_model(model_binary, train_loader, target="val")
    aro_acc, aro_prec, aro_rec, aro_f1 = evaluate_model(model_binary, train_loader, target="aro")

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Loss: {running_loss/len(train_loader):.4f}")
    print(f"Validation Metrics -> Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1-Score: {val_f1:.4f}")
    print(f"Arousal Metrics -> Accuracy: {aro_acc:.4f}, Precision: {aro_prec:.4f}, Recall: {aro_rec:.4f}, F1-Score: {aro_f1:.4f}")


Epoch 1/10
Loss: 1.5167
Validation Metrics -> Accuracy: 0.5340, Precision: 1.0000, Recall: 0.0064, F1-Score: 0.0127
Arousal Metrics -> Accuracy: 0.4920, Precision: 1.0000, Recall: 0.0059, F1-Score: 0.0117
Epoch 2/10
Loss: 1.3938
Validation Metrics -> Accuracy: 0.5840, Precision: 0.6550, Recall: 0.2388, F1-Score: 0.3500
Arousal Metrics -> Accuracy: 0.5420, Precision: 0.6550, Recall: 0.2192, F1-Score: 0.3284
Epoch 3/10
Loss: 1.3795
Validation Metrics -> Accuracy: 0.6320, Precision: 0.5910, Recall: 0.6994, F1-Score: 0.6406
Arousal Metrics -> Accuracy: 0.6420, Precision: 0.6378, Recall: 0.6928, F1-Score: 0.6642
Epoch 4/10
Loss: 1.3702
Validation Metrics -> Accuracy: 0.5090, Precision: 0.4881, Recall: 0.9638, F1-Score: 0.6480
Arousal Metrics -> Accuracy: 0.5550, Precision: 0.5356, Recall: 0.9706, F1-Score: 0.6903
Epoch 5/10
Loss: 1.3718
Validation Metrics -> Accuracy: 0.6370, Precision: 0.6906, Recall: 0.4094, F1-Score: 0.5141
Arousal Metrics -> Accuracy: 0.6170, Precision: 0.7302, Recall: 

## MULTICLASS MODEL

In [74]:
# Multiclass EEG-CNN Modeli
class EEG_CNN_Multiclass(nn.Module):
    def __init__(self, input_size, num_classes=3):
        super(EEG_CNN_Multiclass, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.AvgPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(32 * (input_size // 2), 128)
        self.fc2 = nn.Linear(128, num_classes)  # Multiclass classification için 3 sınıf

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.bn1(self.conv1(x))))
        x = x.view(-1, 32 * (x.size(2)))  # Flatten for fully connected layers
        x = self.dropout(nn.ReLU()(self.fc1(x)))
        x = self.fc2(x)
        return x


In [75]:
# Eğitim için hiperparametreler
input_size = 256
num_classes = 3  # Multiclass classification için 3 sınıf
batch_size = 32
epochs = 10
learning_rate = 0.001

In [76]:
# Model, loss function ve optimizer
model_multiclass = EEG_CNN_Multiclass(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()  # Multiclass için CrossEntropyLoss
optimizer = optim.Adam(model_multiclass.parameters(), lr=learning_rate)

# Dummy Dataset - Gerçek veriler yerine geçici veri kullanıyoruz
X_train = torch.randn(1000, 1, input_size)  # 1000 örnek
y_train_val = torch.randint(0, 3, (1000,))  # val için 3 sınıf etiketler
y_train_aro = torch.randint(0, 3, (1000,))  # aro için 3 sınıf etiketler

# DataLoader
train_dataset = TensorDataset(X_train, y_train_val, y_train_aro)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


In [77]:
# Evaluation function for calculating metrics
def evaluate_model(model, data_loader, target):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels_val, labels_aro in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            if target == "val":
                labels = labels_val
            else:
                labels = labels_aro

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')  # Multiclass için weighted average
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return accuracy, precision, recall, f1


In [78]:
# Eğitim döngüsü
for epoch in range(epochs):
    model_multiclass.train()
    running_loss = 0.0
    for inputs, labels_val, labels_aro in train_loader:
        optimizer.zero_grad()
        outputs = model_multiclass(inputs)

        # Her iki hedef için ayrı kayıplar hesaplanabilir
        loss_val = criterion(outputs, labels_val)  # val için kayıp
        loss_aro = criterion(outputs, labels_aro)  # aro için kayıp

        # Kayıpların toplamı
        loss = loss_val + loss_aro
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Epoch sonrasında metrikleri hesapla (val ve aro için)
    val_acc, val_prec, val_rec, val_f1 = evaluate_model(model_multiclass, train_loader, target="val")
    aro_acc, aro_prec, aro_rec, aro_f1 = evaluate_model(model_multiclass, train_loader, target="aro")

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Loss: {running_loss/len(train_loader):.4f}")
    print(f"Validation Metrics -> Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1-Score: {val_f1:.4f}")
    print(f"Arousal Metrics -> Accuracy: {aro_acc:.4f}, Precision: {aro_prec:.4f}, Recall: {aro_rec:.4f}, F1-Score: {aro_f1:.4f}")


Epoch 1/10
Loss: 2.2983
Validation Metrics -> Accuracy: 0.4040, Precision: 0.4705, Recall: 0.4040, F1-Score: 0.3523
Arousal Metrics -> Accuracy: 0.3950, Precision: 0.4371, Recall: 0.3950, F1-Score: 0.3377
Epoch 2/10
Loss: 2.2027
Validation Metrics -> Accuracy: 0.4270, Precision: 0.4512, Recall: 0.4270, F1-Score: 0.3782
Arousal Metrics -> Accuracy: 0.4300, Precision: 0.4937, Recall: 0.4300, F1-Score: 0.3790
Epoch 3/10
Loss: 2.1869
Validation Metrics -> Accuracy: 0.4580, Precision: 0.5190, Recall: 0.4580, F1-Score: 0.4072
Arousal Metrics -> Accuracy: 0.4220, Precision: 0.4782, Recall: 0.4220, F1-Score: 0.3730
Epoch 4/10
Loss: 2.1848
Validation Metrics -> Accuracy: 0.4050, Precision: 0.5201, Recall: 0.4050, F1-Score: 0.3360
Arousal Metrics -> Accuracy: 0.4260, Precision: 0.4925, Recall: 0.4260, F1-Score: 0.3491
Epoch 5/10
Loss: 2.1701
Validation Metrics -> Accuracy: 0.4420, Precision: 0.4630, Recall: 0.4420, F1-Score: 0.3786
Arousal Metrics -> Accuracy: 0.4310, Precision: 0.4905, Recall: 

In [79]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, TensorDataset

# Multiclass EEG-CNN Modeli
class EEG_CNN_Multiclass(nn.Module):
    def __init__(self, input_size, num_classes=3):
        super(EEG_CNN_Multiclass, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool = nn.AvgPool1d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(32 * (input_size // 2), 128)
        self.fc2 = nn.Linear(128, num_classes)  # Multiclass classification için 3 sınıf

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.bn1(self.conv1(x))))
        x = x.view(-1, 32 * (x.size(2)))  # Flatten for fully connected layers
        x = self.dropout(nn.ReLU()(self.fc1(x)))
        x = self.fc2(x)
        return x

# Eğitim için hiperparametreler
input_size = 256
num_classes = 3  # Multiclass classification için 3 sınıf
batch_size = 32
epochs = 10
learning_rate = 0.001

# Model, loss function ve optimizer
model_multiclass = EEG_CNN_Multiclass(input_size=input_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()  # Multiclass için CrossEntropyLoss
optimizer = optim.Adam(model_multiclass.parameters(), lr=learning_rate)

# Dummy Dataset - Gerçek veriler yerine geçici veri kullanıyoruz
X_train = torch.randn(1000, 1, input_size)  # 1000 örnek
y_train_val = torch.randint(0, 3, (1000,))  # val için 3 sınıf etiketler
y_train_aro = torch.randint(0, 3, (1000,))  # aro için 3 sınıf etiketler

# DataLoader
train_dataset = TensorDataset(X_train, y_train_val, y_train_aro)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Evaluation function for calculating metrics
def evaluate_model(model, data_loader, target):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels_val, labels_aro in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            if target == "val":
                labels = labels_val
            else:
                labels = labels_aro

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')  # Multiclass için weighted average
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    return accuracy, precision, recall, f1

# Eğitim döngüsü
for epoch in range(epochs):
    model_multiclass.train()
    running_loss = 0.0
    for inputs, labels_val, labels_aro in train_loader:
        optimizer.zero_grad()
        outputs = model_multiclass(inputs)

        # Her iki hedef için ayrı kayıplar hesaplanabilir
        loss_val = criterion(outputs, labels_val)  # val için kayıp
        loss_aro = criterion(outputs, labels_aro)  # aro için kayıp

        # Kayıpların toplamı
        loss = loss_val + loss_aro
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Epoch sonrasında metrikleri hesapla (val ve aro için)
    val_acc, val_prec, val_rec, val_f1 = evaluate_model(model_multiclass, train_loader, target="val")
    aro_acc, aro_prec, aro_rec, aro_f1 = evaluate_model(model_multiclass, train_loader, target="aro")

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Loss: {running_loss/len(train_loader):.4f}")
    print(f"Validation Metrics -> Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_rec:.4f}, F1-Score: {val_f1:.4f}")
    print(f"Arousal Metrics -> Accuracy: {aro_acc:.4f}, Precision: {aro_prec:.4f}, Recall: {aro_rec:.4f}, F1-Score: {aro_f1:.4f}")


Epoch 1/10
Loss: 2.2866
Validation Metrics -> Accuracy: 0.3450, Precision: 0.5224, Recall: 0.3450, F1-Score: 0.2330
Arousal Metrics -> Accuracy: 0.3720, Precision: 0.5745, Recall: 0.3720, F1-Score: 0.2650
Epoch 2/10
Loss: 2.1923
Validation Metrics -> Accuracy: 0.4410, Precision: 0.4465, Recall: 0.4410, F1-Score: 0.4404
Arousal Metrics -> Accuracy: 0.4460, Precision: 0.4493, Recall: 0.4460, F1-Score: 0.4455
Epoch 3/10
Loss: 2.1883
Validation Metrics -> Accuracy: 0.4660, Precision: 0.6276, Recall: 0.4660, F1-Score: 0.3824
Arousal Metrics -> Accuracy: 0.4040, Precision: 0.2738, Recall: 0.4040, F1-Score: 0.3227
Epoch 4/10
Loss: 2.1762
Validation Metrics -> Accuracy: 0.4120, Precision: 0.4843, Recall: 0.4120, F1-Score: 0.2951
Arousal Metrics -> Accuracy: 0.3860, Precision: 0.4801, Recall: 0.3860, F1-Score: 0.2708
Epoch 5/10
Loss: 2.1712
Validation Metrics -> Accuracy: 0.3930, Precision: 0.5966, Recall: 0.3930, F1-Score: 0.2490
Arousal Metrics -> Accuracy: 0.3610, Precision: 0.5774, Recall: 