In [1]:
pip install mne pyedflib

Collecting mne
  Downloading mne-1.8.0-py3-none-any.whl.metadata (21 kB)
Collecting pyedflib
  Downloading pyEDFlib-0.1.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Downloading mne-1.8.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyEDFlib-0.1.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyedflib, mne
Successfully installed mne-1.8.0 pyedflib-0.1.38


In [3]:
from google.colab import drive
from glob import glob
import os
import mne
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import glob
import numpy as np
import pyedflib
import os

# Directory containing EDF files
# directory = '/content/drive/MyDrive/dataverse_files(2)'
directory = '/content/drive/MyDrive/dataverse_files (2)'

# Use glob to get a list of EDF file paths
myfiles = glob.glob(directory + '/*.edf')

# Sampling rate of your signals
sampling_rate = 250  # Hz
epoch_length = sampling_rate * 2  # 2 seconds = 500 samples per epoch
num_samples_per_subject = 50  # Number of 2-second epochs to extract from each subject

# Function to adjust EEG signal length to the nearest multiple of epoch length
def adjust_signal_length(signal, epoch_length):
    # Calculate the number of epochs that can be extracted
    num_epochs = len(signal) // epoch_length
    if num_epochs == 0:
        return np.array([])  # Return empty array if not enough data
    return signal[:num_epochs * epoch_length]  # Keep only the data that can be fully divided

# Function to extract 2-second epochs directly from the signal
def extract_epochs(signal, epoch_length):
    num_epochs = len(signal) // epoch_length
    epochs = []
    for i in range(num_epochs):
        start = i * epoch_length
        end = start + epoch_length
        epochs.append(signal[start:end])
    return epochs

# Function to randomly select a specified number of epochs
def select_random_samples(epochs, num_samples):
    epochs_array = np.array(epochs)  # Convert to a NumPy array
    if len(epochs_array) <= num_samples:
        return epochs_array  # If there are fewer epochs than needed, return all
    indices = np.random.choice(len(epochs_array), num_samples, replace=False)
    return epochs_array[indices]  # Select the samples by indices

# Initialize lists to store epochs for unhealthy and healthy signals
# Assume maximum of 19 channels and 14 subjects
num_channels = 19
num_subjects = 14
num_epochs = num_samples_per_subject
epoch_length = 500

# Initialize multidimensional arrays to store epochs
healthy_data = np.empty((num_subjects, num_channels, num_epochs, epoch_length))
unhealthy_data = np.empty((num_subjects, num_channels, num_epochs, epoch_length))

# Initialize counters for subjects
healthy_subject_counter = 0
unhealthy_subject_counter = 0

# Process each file
for file in myfiles:
    # Open the EDF file
    f = pyedflib.EdfReader(file)

    # Get signal labels (channel names)
    signal_labels = f.getSignalLabels()

    # Extract the filename and determine if the subject is healthy or unhealthy
    filename = file.split('/')[-1]
    subject_type = 'healthy' if 'h' in filename else 'unhealthy'

    # Initialize temporary storage for epochs
    temp_epochs = {label: [] for label in signal_labels}

    # Process each channel (electrode)
    for i, label in enumerate(signal_labels):
        signal = f.readSignal(i)

        # Adjust the signal length to the nearest multiple of epoch length
        adjusted_signal = adjust_signal_length(signal, epoch_length)

        # Extract 2-second epochs directly from the signal
        epochs = extract_epochs(adjusted_signal, epoch_length)

        # Select 50 random epochs if there are enough, otherwise use all available
        selected_epochs = select_random_samples(epochs, num_samples_per_subject)

        # Store epochs in temporary storage
        temp_epochs[label] = selected_epochs

    # Store the epochs in the appropriate multidimensional array
    if subject_type == 'healthy':
        for ch in range(num_channels):
            if ch < len(signal_labels):  # Ensure channel exists
                healthy_data[healthy_subject_counter, ch, :, :] = temp_epochs[signal_labels[ch]]
        healthy_subject_counter += 1
    else:
        for ch in range(num_channels):
            if ch < len(signal_labels):  # Ensure channel exists
                unhealthy_data[unhealthy_subject_counter, ch, :, :] = temp_epochs[signal_labels[ch]]
        unhealthy_subject_counter += 1

    f._close()  # Close the EDF file

# Check dimensions and save the data
print(f"Healthy Data Shape: {healthy_data.shape}")
print(f"Unhealthy Data Shape: {unhealthy_data.shape}")

# Optionally save the data to disk
np.save('healthy_data.npy', healthy_data)
np.save('unhealthy_data.npy', unhealthy_data)


Healthy Data Shape: (14, 19, 50, 500)
Unhealthy Data Shape: (14, 19, 50, 500)


In [5]:
healthy_data[0][0][0][0]

11.317479857782855

In [6]:
import numpy as np
import pandas as pd
from IPython.display import display

# Define dimensions
num_subjects = 14
num_channels = 19
num_epochs = 50
samples_per_epoch = 500

# Create random healthy data
# healthy_data _epoch)

# Define channel labels
signal_labels = [
    "Fp1", "Fp2", "F3", "F4", "C3", "C4", "P3", "P4", "O1", "O2",
    "F7", "F8", "T3", "T4", "T5", "T6", "Fz", "Cz", "Pz"
]

# Initialize a list to store rows of tabular data
tabular_data = []

# Iterate through each subject, channel, and epoch to flatten the data
for subject in range(num_subjects):
    for channel in range(num_channels):
        for epoch in range(num_epochs):
            # Extract the signal for the current subject, channel, and epoch
            signal = healthy_data[subject, channel, epoch, :]

            # Create a row with metadata and the signal values
            row = {
                'Subject_ID': subject + 1,
                'Channel': signal_labels[channel],
                'Epoch': epoch + 1,
                'Signal_Values': list(signal)  # Convert numpy array to a list for better readability
            }

            # Append the row to the list
            tabular_data.append(row)

# Create a DataFrame from the list of rows
df_healthy = pd.DataFrame(tabular_data)

# Display the first few rows of the DataFrame
print(df_healthy.head())

# Optionally, save the DataFrame to a CSV file for further analysis
df_healthy.to_csv('healthy_data_tabular.csv', index=False)

# Optionally, display the DataFrame in a tabular format in Google Colab
display(df_healthy)


   Subject_ID Channel  Epoch  \
0           1     Fp1      1   
1           1     Fp1      2   
2           1     Fp1      3   
3           1     Fp1      4   
4           1     Fp1      5   

                                       Signal_Values  
0  [11.317479857782855, 11.011669591356267, 11.77...  
1  [-5.9608001953193535, -10.395049058504876, -11...  
2  [8.870997726370152, 9.023902859583446, 4.89546...  
3  [-11.465384990997933, -11.006669591358051, -8....  
4  [74.62020500808651, 61.16455328531666, 52.4489...  


Unnamed: 0,Subject_ID,Channel,Epoch,Signal_Values
0,1,Fp1,1,"[11.317479857782855, 11.011669591356267, 11.77..."
1,1,Fp1,2,"[-5.9608001953193535, -10.395049058504876, -11..."
2,1,Fp1,3,"[8.870997726370152, 9.023902859583446, 4.89546..."
3,1,Fp1,4,"[-11.465384990997933, -11.006669591358051, -8...."
4,1,Fp1,5,"[74.62020500808651, 61.16455328531666, 52.4489..."
...,...,...,...,...
13295,14,Pz,46,"[-1.0678359324939495, -4.584653996399709, -5.5..."
13296,14,Pz,47,"[-10.242143925291582, -10.089238792078289, -9...."
13297,14,Pz,48,"[7.18904126102392, 3.6722231971181607, 1.68445..."
13298,14,Pz,49,"[-5.349179662466178, -7.489851527452292, -9.63..."


In [None]:
import pandas as pd

# Initialize a list to store rows of tabular data
tabular_data = []

# Iterate through each subject, channel, and epoch to flatten the data
for subject in range(num_subjects):
    for channel in range(num_channels):
        for epoch in range(num_epochs):
            # Extract the signal for the current subject, channel, and epoch
            signal = healthy_data[subject, channel, epoch, :]

            # Create a row with metadata and the signal values
            row = {
                'Subject_ID': subject + 1,
                'Channel': signal_labels[channel],
                'Epoch': epoch + 1,
                'Signal_Values': list(signal)  # Convert numpy array to a list for better readability
            }

            # Append the row to the list
            tabular_data.append(row)

# Create a DataFrame from the list of rows
df_healthy = pd.DataFrame(tabular_data)

# Display the first few rows of the DataFrame
print(df_healthy.head())

# Optionally, save the DataFrame to a CSV file for further analysis
df_healthy.to_csv('healthy_data_tabular.csv', index=False)

# Optionally, display the DataFrame in a tabular format in Google Colab
from IPython.display import display
display(df_healthy)


   Subject_ID Channel  Epoch  \
0           1     Fp1      1   
1           1     Fp1      2   
2           1     Fp1      3   
3           1     Fp1      4   
4           1     Fp1      5   

                                       Signal_Values  
0  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
1  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
2  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
3  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
4  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  


Unnamed: 0,Subject_ID,Channel,Epoch,Signal_Values
0,1,Fp1,1,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,1,Fp1,2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,1,Fp1,3,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,1,Fp1,4,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,1,Fp1,5,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...
13295,14,Pz,46,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
13296,14,Pz,47,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
13297,14,Pz,48,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
13298,14,Pz,49,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [7]:
import pandas as pd

# Initialize a list to store rows of tabular data for unhealthy data
tabular_data_unhealthy = []

# Iterate through each subject, channel, and epoch to flatten the unhealthy data
for subject in range(num_subjects):
    for channel in range(num_channels):
        for epoch in range(num_epochs):
            # Extract the signal for the current subject, channel, and epoch
            signal = unhealthy_data[subject, channel, epoch, :]

            # Create a row with metadata and the signal values
            row = {
                'Subject_ID': subject + 1,
                'Channel': signal_labels[channel],
                'Epoch': epoch + 1,
                'Signal_Values': list(signal)  # Convert numpy array to a list for better readability
            }

            # Append the row to the list
            tabular_data_unhealthy.append(row)

# Create a DataFrame from the list of rows for unhealthy data
df_unhealthy = pd.DataFrame(tabular_data_unhealthy)

# Display the first few rows of the DataFrame
print(df_unhealthy.head())

# Optionally, save the DataFrame to a CSV file for further analysis
df_unhealthy.to_csv('unhealthy_data_tabular.csv', index=False)

# Optionally, display the DataFrame in a tabular format in Google Colab
from IPython.display import display
display(df_unhealthy)


   Subject_ID Channel  Epoch  \
0           1     Fp1      1   
1           1     Fp1      2   
2           1     Fp1      3   
3           1     Fp1      4   
4           1     Fp1      5   

                                       Signal_Values  
0  [-3.5143180639066514, -1.526551332133831, -0.3...  
1  [2.44898213141181, 4.742559129611218, 7.036136...  
2  [-4.431748863186415, -4.125938596759827, -5.04...  
3  [-3.6672231971199456, -3.8201283303332394, -6....  
4  [3.2135077974782793, 3.6722231971181607, 2.754...  


Unnamed: 0,Subject_ID,Channel,Epoch,Signal_Values
0,1,Fp1,1,"[-3.5143180639066514, -1.526551332133831, -0.3..."
1,1,Fp1,2,"[2.44898213141181, 4.742559129611218, 7.036136..."
2,1,Fp1,3,"[-4.431748863186415, -4.125938596759827, -5.04..."
3,1,Fp1,4,"[-3.6672231971199456, -3.8201283303332394, -6...."
4,1,Fp1,5,"[3.2135077974782793, 3.6722231971181607, 2.754..."
...,...,...,...,...
13295,14,Pz,46,"[-13.597209280679062, -15.56500930814572, 3.57..."
13296,14,Pz,47,"[4.291881878108741, -14.133882015442696, -11.4..."
13297,14,Pz,48,"[-3.579318231757892, 10.73195469527235, 8.0485..."
13298,14,Pz,49,"[-10.019391048921502, 8.764154667805693, 12.69..."


Working perfectly for IMF

In [None]:
import torch
import pandas as pd

# Load the data
df = pd.read_csv("healthy_data_tabular.csv")  # Replace with your actual file path

# Convert the Signal_Values to actual lists
df['Signal_Values'] = df['Signal_Values'].apply(eval)

# Define a placeholder MVMD function for demonstration (replace with your actual MVMD code)
def mvmd(signal, K=3):
    # Simulate some output for demonstration
    batch_size, num_channels, num_samples = signal.shape
    imfs = torch.randn(batch_size, K, num_channels, num_samples)  # Replace with actual MVMD output
    return imfs, None, None

# Initialize list to collect IMF data for CSV
imf_data_list = []
unique_subjects = df['Subject_ID'].unique()
unique_epochs = df['Epoch'].unique()

# Loop over each subject and each epoch
for subject in unique_subjects:
    for epoch in unique_epochs:
        # Filter data for the specific subject and epoch
        group_data = df[(df['Subject_ID'] == subject) & (df['Epoch'] == epoch)]

        # Sort channels to ensure consistency
        group_data = group_data.sort_values(by='Channel')

        # Check if we have all 19 channels
        if len(group_data) == 19:
            # Stack signal values from all 19 channels into a tensor
            signal = torch.stack([torch.tensor(row['Signal_Values']) for _, row in group_data.iterrows()])

            # Add batch dimension to signal tensor
            signal = signal.unsqueeze(0)  # shape becomes (1, 19, 500) where 19 is the channel count

            # Check if the shape is correct
            if signal.shape[2] == 500:
                print(f"Processing Subject: {subject}, Epoch: {epoch}")

                # Perform MVMD on the multi-channel signal
                imfs, _, _ = mvmd(signal, K=3)

                # Store each IMF for each channel in the list
                for ch in range(imfs.shape[2]):  # Loop over each channel
                    for k in range(imfs.shape[1]):  # Loop over each IMF
                        imf_values = imfs[0, k, ch, :].tolist()  # Convert IMF tensor to a list
                        imf_data_list.append({
                            'Subject_ID': subject,
                            'Epoch': epoch,
                            'Channel': ch,
                            'IMF_Number': k + 1,
                            'IMF_Values': imf_values
                        })

# Convert list to DataFrame and save to CSV
imf_df = pd.DataFrame(imf_data_list)
# change below file name according to health and unhealthy file names
imf_df.to_csv("imf_data_healthy.csv", index=False)
print("IMFs saved to imf_data.csv.")


Processing Subject: 1, Epoch: 1
Processing Subject: 1, Epoch: 2
Processing Subject: 1, Epoch: 3
Processing Subject: 1, Epoch: 4
Processing Subject: 1, Epoch: 5
Processing Subject: 1, Epoch: 6
Processing Subject: 1, Epoch: 7
Processing Subject: 1, Epoch: 8
Processing Subject: 1, Epoch: 9
Processing Subject: 1, Epoch: 10
Processing Subject: 1, Epoch: 11
Processing Subject: 1, Epoch: 12
Processing Subject: 1, Epoch: 13
Processing Subject: 1, Epoch: 14
Processing Subject: 1, Epoch: 15
Processing Subject: 1, Epoch: 16
Processing Subject: 1, Epoch: 17
Processing Subject: 1, Epoch: 18
Processing Subject: 1, Epoch: 19
Processing Subject: 1, Epoch: 20
Processing Subject: 1, Epoch: 21
Processing Subject: 1, Epoch: 22
Processing Subject: 1, Epoch: 23
Processing Subject: 1, Epoch: 24
Processing Subject: 1, Epoch: 25
Processing Subject: 1, Epoch: 26
Processing Subject: 1, Epoch: 27
Processing Subject: 1, Epoch: 28
Processing Subject: 1, Epoch: 29
Processing Subject: 1, Epoch: 30
Processing Subject:

In [None]:
import pandas as pd

# Load CSV data
unhealthy_data = pd.read_csv("imf_data_healthy.csv")
# healthy_data = pd.read_csv("hProcessed_MVMD_IMFs.csv")

In [None]:
unhealthy_data

Unnamed: 0,Subject_ID,Epoch,Channel,IMF_Number,IMF_Values
0,1,1,0,1,"[-1.1501975059509277, -0.35760951042175293, -0..."
1,1,1,0,2,"[-0.8239970207214355, -0.19944284856319427, -0..."
2,1,1,0,3,"[-0.9796571135520935, -0.3295162320137024, -0...."
3,1,1,1,1,"[-0.5299670696258545, 0.03610764071345329, 0.0..."
4,1,1,1,2,"[0.829396665096283, 0.7601236701011658, -0.686..."
...,...,...,...,...,...
39895,14,50,17,2,"[-0.000277706392807886, -0.8788760900497437, 1..."
39896,14,50,17,3,"[-0.895706295967102, 1.677400827407837, 0.7171..."
39897,14,50,18,1,"[0.16693884134292603, -0.9829882979393005, 0.1..."
39898,14,50,18,2,"[-0.19964121282100677, -0.36058586835861206, 1..."


In [None]:
import pandas as pd

# Load the CSV file
file_path = "Processed_MVMD_IMFs.csv"  # Update this path if necessary
new_file_path = "First_10000_Entries.csv"

try:
    data = pd.read_csv(file_path)

    # Select the first 10,000 entries
    first_10000_entries = data.head(10000)

    # Save to a new CSV file
    first_10000_entries.to_csv(new_file_path, index=False)
    print(f"First 10,000 entries saved to '{new_file_path}'.")

except FileNotFoundError:
    print(f"The file '{file_path}' was not found. Please check the file path and try again.")


First 10,000 entries saved to 'First_10000_Entries.csv'.


In [None]:
!pip install EMD-signal


Collecting EMD-signal
  Downloading EMD_signal-1.6.4-py3-none-any.whl.metadata (8.9 kB)
Collecting pathos>=0.2.1 (from EMD-signal)
  Downloading pathos-0.3.3-py3-none-any.whl.metadata (11 kB)
Collecting ppft>=1.7.6.9 (from pathos>=0.2.1->EMD-signal)
  Downloading ppft-1.7.6.9-py3-none-any.whl.metadata (12 kB)
Collecting dill>=0.3.9 (from pathos>=0.2.1->EMD-signal)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting pox>=0.3.5 (from pathos>=0.2.1->EMD-signal)
  Downloading pox-0.3.5-py3-none-any.whl.metadata (8.0 kB)
Collecting multiprocess>=0.70.17 (from pathos>=0.2.1->EMD-signal)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Downloading EMD_signal-1.6.4-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pathos-0.3.3-py3-none-any.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m6.6 MB/s[

In [None]:
import pandas as pd
from scipy.stats import entropy
import numpy as np

# Load the dataset
data = pd.read_csv("Processed_MVMD_IMFs.csv")  # Replace with your file name

# Define a function to calculate Shannon entropy
def calculate_shannon_entropy(imf_values):
    # Convert values to absolute values to avoid negative probabilities
    imf_values = np.abs(imf_values)
    # Normalize the IMF values to use as a probability distribution
    probability_distribution = imf_values / np.sum(imf_values)
    # Calculate Shannon entropy
    return entropy(probability_distribution)

# Prepare to store entropy results
entropy_results = []

# Group by Subject_ID, Epoch, and Channel and calculate entropy for IMF_1
for (subject_id, epoch, channel), group_data in data.groupby(['Subject_ID', 'Epoch', 'Channel']):
    # Calculate entropy for IMF_1
    imf_entropy = calculate_shannon_entropy(group_data['IMF_1'].values)

    # Store the results in a dictionary
    entropy_results.append({
        'Subject_ID': subject_id,
        'Epoch': epoch,
        'Channel': channel,
        'IMF_1_Entropy': imf_entropy
    })

# Convert results to a DataFrame
entropy_df = pd.DataFrame(entropy_results)

# Save to CSV if needed
entropy_df.to_csv("IMF_Entropy_Results.csv", index=False)
print("Entropy results saved to 'IMF_Entropy_Results.csv'.")


Entropy results saved to 'IMF_Entropy_Results.csv'.


In [None]:
import pandas as pd
from scipy.stats import entropy
import numpy as np

# Load the dataset
data = pd.read_csv("Processed_MVMD_IMFs.csv")  # Replace with your file name

# Define a function to calculate Shannon entropy
def calculate_shannon_entropy(imf_values):
    imf_values = np.abs(imf_values)  # Avoid negative probabilities
    probability_distribution = imf_values / np.sum(imf_values)
    return entropy(probability_distribution)

# Prepare to store entropy results for each channel
entropy_results = []

# Loop through each Subject, Epoch, and Channel group to compute entropy
for (subject_id, epoch, channel), group_data in data.groupby(['Subject_ID', 'Epoch', 'Channel']):
    print(channel)
    imf_entropy = calculate_shannon_entropy(group_data['IMF_1'].values)

    # Store results for each channel
    entropy_results.append({
        'Subject_ID': subject_id,
        'Epoch': epoch,
        'Channel': channel,
        'IMF_1_Entropy': imf_entropy
    })

# Convert results to DataFrame
entropy_df = pd.DataFrame(entropy_results)

# Save entropy values for all channels to CSV
entropy_df.to_csv("IMF_Entropy_Results_All_Channels.csv", index=False)
print("Entropy results for all channels saved to 'IMF_Entropy_Results_All_Channels.csv'.")


F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4
F8
Fp2
T4


In [None]:
# features to be find
# ---------------------------------
# 1. shannon
# 2. absolute
# 3. approximate
# 4. spectral
# 5. sample
# 6. svd


In [None]:
from google.colab import files
uploaded = files.upload()  # Prompts you to upload a file

import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['un_imf_entropies_with_measures.csv']))
print(df.head())


KeyboardInterrupt: 

In [None]:
import pandas as pd

# Reading the CSV file
df = pd.read_csv('un_imf_entropies.csv')  # Replace 'your_file.csv' with your file's name
print(df.head())  # Displays the first 5 rows of the DataFrame


FileNotFoundError: [Errno 2] No such file or directory: 'un_imf_entropies.csv'

In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('imf_entropies_with_measures.csv')

# Display the first few rows
print(df.head())


   Subject_ID  Epoch  Channel  IMF_Number  Shannon_Entropy  \
0           1      1        0           1        64.447018   
1           1      1        0           2        66.109106   
2           1      1        0           3        73.466164   
3           1      1        1           1        71.560436   
4           1      1        1           2        65.538417   

   Approximate_Entropy  Absolute_Entropy  Sample_Entropy  Spectral_Entropy  \
0             1.296682       -298.652953        1.414918          7.310699   
1             1.380944       -313.000785        1.468334          7.399364   
2             1.405421       -270.588803        1.497151          7.359589   
3             1.360427       -302.098013        1.449004          7.339466   
4             1.381318       -304.033035        1.481001          7.371134   

   SVD_Entropy  
0     1.584733  
1     1.584860  
2     1.584664  
3     1.583511  
4     1.584587  


In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Step 2: Explore the data
print(df.head())  # Print first few rows
print(df.info())  # Check for missing data and data types

# Step 3: Prepare the data
# Assuming the last column is the target variable (replace with the correct column names)
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the SVM model
svm_model = SVC(kernel='linear')  # You can try other kernels like 'rbf', 'poly', etc.
svm_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = svm_model.predict(X_test)

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


    71.600200    1.328251    -262.765943   1.428459     7.303451    \
0    68.699080     1.363542  -305.286896     1.496620     7.346205   
1    74.198253     1.340605  -329.116923     1.454319     7.354627   
2    71.816201     1.342889  -244.083279     1.435167     7.306929   
3    66.165703     1.313681  -315.992394     1.436436     7.332617   
4    69.101628     1.331392  -314.182161     1.436849     7.465824   

    1.584197    -1.000000    
0     1.584959           -1  
1     1.584863           -1  
2     1.584731            1  
3     1.584806            1  
4     1.583212            1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79799 entries, 0 to 79798
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   71.60020034   79799 non-null  float64
 1   1.328251248   79799 non-null  float64
 2   -262.7659428  79799 non-null  float64
 3   1.428459083   79799 non-null  float64
 4   7.303450851   79799 non-null

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Step 2: Explore the data
print(df.head())  # Print first few rows
print(df.info())  # Check for missing data and data types

# Step 3: Prepare the data
# Assuming the last column is the target variable (replace with the correct column names)
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Step 4: Train the SVM model
svm_model = SVC(kernel='linear')  # You can try other kernels like 'rbf', 'poly', etc.
svm_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = svm_model.predict(X_test)

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


    71.600200    1.328251    -262.765943   1.428459     7.303451    \
0    68.699080     1.363542  -305.286896     1.496620     7.346205   
1    74.198253     1.340605  -329.116923     1.454319     7.354627   
2    71.816201     1.342889  -244.083279     1.435167     7.306929   
3    66.165703     1.313681  -315.992394     1.436436     7.332617   
4    69.101628     1.331392  -314.182161     1.436849     7.465824   

    1.584197    -1.000000    
0     1.584959           -1  
1     1.584863           -1  
2     1.584731            1  
3     1.584806            1  
4     1.583212            1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79799 entries, 0 to 79798
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   71.60020034   79799 non-null  float64
 1   1.328251248   79799 non-null  float64
 2   -262.7659428  79799 non-null  float64
 3   1.428459083   79799 non-null  float64
 4   7.303450851   79799 non-null

In [None]:
# Import libraries
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, accuracy_score

# Load the dataset (replace with your dataset)
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Prepare the data
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Initialize the SVM model
svm_model = SVC(kernel='linear')  # You can choose other kernels like 'rbf', 'poly', etc.

# Perform 10-fold cross-validation
scores = cross_val_score(svm_model, X, y, cv=10, scoring='accuracy')

# Print results
print("Cross-validation scores for each fold:", scores)
print("Mean Accuracy:", scores.mean())
print("Standard Deviation:", scores.std())



KeyboardInterrupt: 

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file path
df = pd.read_excel(file_path)

# Prepare the data
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Define the KNN model
knn_model = KNeighborsClassifier()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': range(1, 21),  # Test k from 1 to 20
    'weights': ['uniform', 'distance'],  # Uniform vs distance-based weights
    'metric': ['euclidean', 'manhattan', 'minkowski']  # Distance metrics
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=knn_model,
    param_grid=param_grid,
    cv=10,  # 10-fold cross-validation
    scoring='accuracy',
    verbose=2,  # Show progress
    n_jobs=-1   # Use all available CPU cores
)

# Perform the grid search
grid_search.fit(X, y)

# Print the best parameters and the best score
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)


Fitting 10 folds for each of 120 candidates, totalling 1200 fits
Best Parameters: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
Best Cross-Validation Accuracy: 0.5074249541638278


In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import random  # For generating random values

# Step 1: Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Very the random value from 1 to 100 and print result each time
random_value = random.randint(1, 100)  # Generate random integer between 1 and 100
print(f"Random Value at Step 1 (Load Data): {random_value}")

# Step 2: Explore the data
print(df.head())  # Print first few rows
print(df.info())  # Check for missing data and data types
random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 2 (Explore Data): {random_value}")

# Step 3: Prepare the data
# Assuming the last column is the target variable (replace with the correct column names)
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 3 (Data Preparation): {random_value}")

# Step 4: Train the SVM model
svm_model = SVC(kernel='linear')  # You can try other kernels like 'rbf', 'poly', etc.
svm_model.fit(X_train, y_train)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 4 (Train SVM): {random_value}")

# Step 5: Make predictions
y_pred = svm_model.predict(X_test)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 5 (Predictions): {random_value}")

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 6 (Model Evaluation): {random_value}")


Random Value at Step 1 (Load Data): 47
    71.600200    1.328251    -262.765943   1.428459     7.303451    \
0    68.699080     1.363542  -305.286896     1.496620     7.346205   
1    74.198253     1.340605  -329.116923     1.454319     7.354627   
2    71.816201     1.342889  -244.083279     1.435167     7.306929   
3    66.165703     1.313681  -315.992394     1.436436     7.332617   
4    69.101628     1.331392  -314.182161     1.436849     7.465824   

    1.584197    -1.000000    
0     1.584959           -1  
1     1.584863           -1  
2     1.584731            1  
3     1.584806            1  
4     1.583212            1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79799 entries, 0 to 79798
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   71.60020034   79799 non-null  float64
 1   1.328251248   79799 non-null  float64
 2   -262.7659428  79799 non-null  float64
 3   1.428459083   79799 non-null  fl

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import random  # For generating random values

# Step 1: Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Very the random value from 1 to 100 and print result each time
random_value = random.randint(1, 100)  # Generate random integer between 1 and 100
print(f"Random Value at Step 1 (Load Data): {random_value}")

# Step 2: Explore the data
print(df.head())  # Print first few rows
print(df.info())  # Check for missing data and data types
random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 2 (Explore Data): {random_value}")

# Step 3: Prepare the data
# Assuming the last column is the target variable (replace with the correct column names)
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 3 (Data Preparation): {random_value}")

# Step 4: Train the SVM model
svm_model = SVC(kernel='linear')  # You can try other kernels like 'rbf', 'poly', etc.
svm_model.fit(X_train, y_train)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 4 (Train SVM): {random_value}")

# Step 5: Make predictions
y_pred = svm_model.predict(X_test)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 5 (Predictions): {random_value}")

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 6 (Model Evaluation): {random_value}")


Random Value at Step 1 (Load Data): 52
    71.600200    1.328251    -262.765943   1.428459     7.303451    \
0    68.699080     1.363542  -305.286896     1.496620     7.346205   
1    74.198253     1.340605  -329.116923     1.454319     7.354627   
2    71.816201     1.342889  -244.083279     1.435167     7.306929   
3    66.165703     1.313681  -315.992394     1.436436     7.332617   
4    69.101628     1.331392  -314.182161     1.436849     7.465824   

    1.584197    -1.000000    
0     1.584959           -1  
1     1.584863           -1  
2     1.584731            1  
3     1.584806            1  
4     1.583212            1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79799 entries, 0 to 79798
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   71.60020034   79799 non-null  float64
 1   1.328251248   79799 non-null  float64
 2   -262.7659428  79799 non-null  float64
 3   1.428459083   79799 non-null  fl

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import random  # For generating random values

# Step 1: Load the Excel file
file_path = 'entropy_data.xlsx'  # Replace with your file name
df = pd.read_excel(file_path)

# Verify the random value from 1 to 100 and print result each time
random_value = random.randint(1, 100)  # Generate random integer between 1 and 100
print(f"Random Value at Step 1 (Load Data): {random_value}")

# Step 2: Explore the data
print(df.head())  # Print first few rows
print(df.info())  # Check for missing data and data types
random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 2 (Explore Data): {random_value}")

# Step 3: Prepare the data
# Assuming the last column is the target variable (replace with the correct column names)
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 3 (Data Preparation): {random_value}")

# Step 4: Train the SVM model
svm_model = SVC(kernel='linear')  # You can change kernel to 'rbf', 'poly', etc.
svm_model.fit(X_train, y_train)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 4 (Train SVM): {random_value}")

# Step 5: Make predictions
y_pred = svm_model.predict(X_test)

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 5 (Predictions): {random_value}")

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

random_value = random.randint(1, 100)  # Generate a new random integer
print(f"Random Value at Step 6 (Model Evaluation): {random_value}")


Random Value at Step 1 (Load Data): 35
    71.600200    1.328251    -262.765943   1.428459     7.303451    \
0    68.699080     1.363542  -305.286896     1.496620     7.346205   
1    74.198253     1.340605  -329.116923     1.454319     7.354627   
2    71.816201     1.342889  -244.083279     1.435167     7.306929   
3    66.165703     1.313681  -315.992394     1.436436     7.332617   
4    69.101628     1.331392  -314.182161     1.436849     7.465824   

    1.584197    -1.000000    
0     1.584959           -1  
1     1.584863           -1  
2     1.584731            1  
3     1.584806            1  
4     1.583212            1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79799 entries, 0 to 79798
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   71.60020034   79799 non-null  float64
 1   1.328251248   79799 non-null  float64
 2   -262.7659428  79799 non-null  float64
 3   1.428459083   79799 non-null  fl

Direct approach

In [8]:
# pip install numpy pandas pyinform
!pip install --upgrade pyinform
!pip install antropy



Collecting pyinform
  Downloading pyinform-0.2.0-py3-none-any.whl.metadata (1.8 kB)
Downloading pyinform-0.2.0-py3-none-any.whl (131 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/131.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.2/131.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyinform
Successfully installed pyinform-0.2.0
Collecting antropy
  Downloading antropy-0.1.6.tar.gz (17 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting stochastic (from antropy)
  Downloading stochastic-0.7.0-py3-none-any.whl.metadata (6.1 kB)
Downloading stochastic-0.7.0-py3-none-any.whl (48 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected pa

In [9]:
import pandas as pd
import numpy as np
from collections import Counter
from scipy.stats import entropy as shannon_entropy
import antropy as ant

# Function to calculate Shannon Entropy
def calculate_shannon_entropy(signal):
    counter = Counter(signal)  # Count occurrences of each value
    total_count = len(signal)
    probabilities = [count / total_count for count in counter.values()]
    return -sum(p * np.log2(p) for p in probabilities if p > 0)

# Function to calculate Approximate Entropy
def calculate_approximate_entropy(signal, m=2, r=0.2):
    return ant.app_entropy(signal, order=m, metric="chebyshev")

# Function to calculate Sample Entropy
def calculate_sample_entropy(signal, m=2, r=0.2):
    return ant.sample_entropy(signal, order=m, metric="chebyshev")

# Function to calculate Tsallis Entropy
def calculate_tsallis_entropy(signal, q=2):
    counter = Counter(signal)  # Count occurrences of each value
    total_count = len(signal)
    probabilities = np.array([count / total_count for count in counter.values()])
    tsallis = (1 - np.sum(probabilities ** q)) / (q - 1) if q != 1 else calculate_shannon_entropy(signal)
    return tsallis

# Function to calculate Spectral Entropy
def calculate_spectral_entropy(signal, sampling_frequency=1):
    return ant.spectral_entropy(signal, sf=sampling_frequency, method="welch", normalize=True)

# Load CSV data
input_file = 'healthy_data_tabular.csv'  # Replace with your actual file
data = pd.read_csv(input_file)

# Ensure Signal_Values is parsed as a list of floats
data['Signal_Values'] = data['Signal_Values'].apply(lambda x: eval(x) if isinstance(x, str) else x)

# Initialize new columns
data['Shannon_Entropy'] = data['Signal_Values'].apply(calculate_shannon_entropy)
data['Approximate_Entropy'] = data['Signal_Values'].apply(calculate_approximate_entropy)
data['Sample_Entropy'] = data['Signal_Values'].apply(calculate_sample_entropy)
data['Tsallis_Entropy'] = data['Signal_Values'].apply(calculate_tsallis_entropy)
data['Spectral_Entropy'] = data['Signal_Values'].apply(calculate_spectral_entropy)

# Save updated data to a new CSV
output_file = 'data_with_entropies.csv'
data.to_csv(output_file, index=False)

print(f"Entropy calculations complete. Results saved to {output_file}.")


  psd_norm = psd / psd.sum(axis=axis, keepdims=True)


Entropy calculations complete. Results saved to data_with_entropies.csv.


In [20]:

input_file = 'data_with_entropies.csv'  # Replace with your actual file name
hdata = pd.read_csv(input_file)
hdata

Unnamed: 0,Subject_ID,Channel,Epoch,Signal_Values,Shannon_Entropy,Approximate_Entropy,Sample_Entropy,Tsallis_Entropy,Spectral_Entropy
0,1,Fp1,1,"[11.317479857782855, 11.011669591356267, 11.77...",7.511884,0.907534,0.929651,0.993672,0.489428
1,1,Fp1,2,"[-5.9608001953193535, -10.395049058504876, -11...",7.213492,1.083219,1.117977,0.991832,0.680598
2,1,Fp1,3,"[8.870997726370152, 9.023902859583446, 4.89546...",6.782381,1.128291,1.232144,0.989224,0.585859
3,1,Fp1,4,"[-11.465384990997933, -11.006669591358051, -8....",7.689071,0.857106,0.902307,0.994496,0.431208
4,1,Fp1,5,"[74.62020500808651, 61.16455328531666, 52.4489...",8.157974,0.559002,0.481663,0.995984,0.310777
...,...,...,...,...,...,...,...,...,...
13295,14,Pz,46,"[-1.0678359324939495, -4.584653996399709, -5.5...",7.090620,0.872365,0.902909,0.991288,0.652485
13296,14,Pz,47,"[-10.242143925291582, -10.089238792078289, -9....",6.800197,1.139629,1.256508,0.989040,0.654079
13297,14,Pz,48,"[7.18904126102392, 3.6722231971181607, 1.68445...",7.200287,0.867842,0.911789,0.991680,0.587804
13298,14,Pz,49,"[-5.349179662466178, -7.489851527452292, -9.63...",7.167406,0.992832,1.059401,0.991784,0.498251


In [11]:
import pandas as pd
import numpy as np
from collections import Counter
from scipy.stats import entropy as shannon_entropy
import antropy as ant

# Function to calculate Shannon Entropy
def calculate_shannon_entropy(signal):
    counter = Counter(signal)  # Count occurrences of each value
    total_count = len(signal)
    probabilities = [count / total_count for count in counter.values()]
    return -sum(p * np.log2(p) for p in probabilities if p > 0)

# Function to calculate Approximate Entropy
def calculate_approximate_entropy(signal, m=2, r=0.2):
    return ant.app_entropy(signal, order=m, metric="chebyshev")

# Function to calculate Sample Entropy
def calculate_sample_entropy(signal, m=2, r=0.2):
    return ant.sample_entropy(signal, order=m, metric="chebyshev")

# Function to calculate Tsallis Entropy
def calculate_tsallis_entropy(signal, q=2):
    counter = Counter(signal)  # Count occurrences of each value
    total_count = len(signal)
    probabilities = np.array([count / total_count for count in counter.values()])
    tsallis = (1 - np.sum(probabilities ** q)) / (q - 1) if q != 1 else calculate_shannon_entropy(signal)
    return tsallis

# Function to calculate Spectral Entropy
def calculate_spectral_entropy(signal, sampling_frequency=1):
    return ant.spectral_entropy(signal, sf=sampling_frequency, method="welch", normalize=True)

# Load CSV data
input_file = 'unhealthy_data_tabular.csv'  # Replace with your actual file
data = pd.read_csv(input_file)

# Ensure Signal_Values is parsed as a list of floats
data['Signal_Values'] = data['Signal_Values'].apply(lambda x: eval(x) if isinstance(x, str) else x)

# Initialize new columns
data['Shannon_Entropy'] = data['Signal_Values'].apply(calculate_shannon_entropy)
data['Approximate_Entropy'] = data['Signal_Values'].apply(calculate_approximate_entropy)
data['Sample_Entropy'] = data['Signal_Values'].apply(calculate_sample_entropy)
data['Tsallis_Entropy'] = data['Signal_Values'].apply(calculate_tsallis_entropy)
data['Spectral_Entropy'] = data['Signal_Values'].apply(calculate_spectral_entropy)

# Save updated data to a new CSV
output_file = 'data_with_unentropies.csv'
data.to_csv(output_file, index=False)

print(f"Entropy calculations complete. Results saved to {output_file}.")


  psd_norm = psd / psd.sum(axis=axis, keepdims=True)


Entropy calculations complete. Results saved to data_with_unentropies.csv.


In [15]:
input_file = 'data_with_unentropies.csv'  # Replace with your actual file name
data = pd.read_csv(input_file)
data

Unnamed: 0,Subject_ID,Channel,Epoch,Signal_Values,Shannon_Entropy,Approximate_Entropy,Sample_Entropy,Tsallis_Entropy,Spectral_Entropy
0,1,Fp1,1,"[-3.5143180639066514, -1.526551332133831, -0.3...",7.296329,0.949664,0.972611,0.992424,0.569092
1,1,Fp1,2,"[2.44898213141181, 4.742559129611218, 7.036136...",7.002282,0.797395,0.767046,0.990336,0.554492
2,1,Fp1,3,"[-4.431748863186415, -4.125938596759827, -5.04...",7.636261,0.679709,0.634840,0.993984,0.409134
3,1,Fp1,4,"[-3.6672231971199456, -3.8201283303332394, -6....",7.467782,0.813597,0.758204,0.993400,0.479331
4,1,Fp1,5,"[3.2135077974782793, 3.6722231971181607, 2.754...",7.209213,0.756629,0.759479,0.991824,0.588887
...,...,...,...,...,...,...,...,...,...
13295,14,Pz,46,"[-13.597209280679062, -15.56500930814572, 3.57...",7.631395,0.687565,0.747175,0.994152,0.402272
13296,14,Pz,47,"[4.291881878108741, -14.133882015442696, -11.4...",7.537039,0.486753,0.480683,0.993840,0.345263
13297,14,Pz,48,"[-3.579318231757892, 10.73195469527235, 8.0485...",7.553090,0.553027,0.561509,0.993832,0.350390
13298,14,Pz,49,"[-10.019391048921502, 8.764154667805693, 12.69...",7.845748,0.812216,0.822885,0.994984,0.389353


In [17]:
import pandas as pd

# Assuming your data is loaded into a DataFrame called 'df'
# Example: df = pd.read_csv('your_data.csv')

# List of entropy measures
entropy_measures = ['Shannon_Entropy', 'Approximate_Entropy', 'Sample_Entropy', 'Tsallis_Entropy', 'Spectral_Entropy']

# Initialize an empty list to store the feature vectors
feature_vectors = []
df = data
# Group by Subject_ID and Epoch
for (subject_id, epoch), group in df.groupby(['Subject_ID', 'Epoch']):
    # Initialize a list for the feature vector
    feature_vector = []

    # Loop through each channel (19 channels)
    for channel in group['Channel'].unique():
        # Extract the entropy measures for the current channel
        channel_data = group[group['Channel'] == channel]

        # Append the entropy measures to the feature vector
        for entropy in entropy_measures:
            feature_vector.append(channel_data[entropy].values[0])

    # Append the feature vector for the current subject and epoch
    feature_vectors.append(feature_vector)

# Convert the feature vectors into a DataFrame
feature_df = pd.DataFrame(feature_vectors, columns=[f"{channel}_{entropy}" for channel in df['Channel'].unique() for entropy in entropy_measures])

# Display the resulting feature DataFrame
print(feature_df)



     Fp1_Shannon_Entropy  Fp1_Approximate_Entropy  Fp1_Sample_Entropy  \
0               7.296329                 0.949664            0.972611   
1               7.002282                 0.797395            0.767046   
2               7.636261                 0.679709            0.634840   
3               7.467782                 0.813597            0.758204   
4               7.209213                 0.756629            0.759479   
..                   ...                      ...                 ...   
695             7.510716                 0.482028            0.408978   
696             7.784220                 0.780040            0.858951   
697             7.738418                 0.677618            0.705295   
698             7.871188                 0.799784            0.882716   
699             7.721516                 0.818647            0.946215   

     Fp1_Tsallis_Entropy  Fp1_Spectral_Entropy  Fp2_Shannon_Entropy  \
0               0.992424              0.569092      

In [18]:
# Save the resulting feature DataFrame to a CSV file
feature_df.to_csv('feature_vectors.csv', index=False)


In [21]:
import pandas as pd

# Assuming your data is loaded into a DataFrame called 'df'
# Example: df = pd.read_csv('your_data.csv')

# List of entropy measures
entropy_measures = ['Shannon_Entropy', 'Approximate_Entropy', 'Sample_Entropy', 'Tsallis_Entropy', 'Spectral_Entropy']

# Initialize an empty list to store the feature vectors
feature_vectors = []
df = hdata
# Group by Subject_ID and Epoch
for (subject_id, epoch), group in df.groupby(['Subject_ID', 'Epoch']):
    # Initialize a list for the feature vector
    feature_vector = []

    # Loop through each channel (19 channels)
    for channel in group['Channel'].unique():
        # Extract the entropy measures for the current channel
        channel_data = group[group['Channel'] == channel]

        # Append the entropy measures to the feature vector
        for entropy in entropy_measures:
            feature_vector.append(channel_data[entropy].values[0])

    # Append the feature vector for the current subject and epoch
    feature_vectors.append(feature_vector)

# Convert the feature vectors into a DataFrame
feature_df = pd.DataFrame(feature_vectors, columns=[f"{channel}_{entropy}" for channel in df['Channel'].unique() for entropy in entropy_measures])

# Display the resulting feature DataFrame
print(feature_df)



     Fp1_Shannon_Entropy  Fp1_Approximate_Entropy  Fp1_Sample_Entropy  \
0               7.511884                 0.907534            0.929651   
1               7.213492                 1.083219            1.117977   
2               6.782381                 1.128291            1.232144   
3               7.689071                 0.857106            0.902307   
4               8.157974                 0.559002            0.481663   
..                   ...                      ...                 ...   
695             7.571292                 0.931838            0.929213   
696             7.839090                 0.694633            0.640583   
697             7.734826                 0.810653            0.790117   
698             7.295846                 1.146391            1.259092   
699             7.547497                 0.887800            0.919679   

     Fp1_Tsallis_Entropy  Fp1_Spectral_Entropy  Fp2_Shannon_Entropy  \
0               0.993672              0.489428      

In [22]:
# Save the resulting feature DataFrame to a CSV file
feature_df.to_csv('hfeature_vectors.csv', index=False)

In [27]:
import pandas as pd

# Load the CSV files
feature_df = pd.read_csv('feature_vectors.csv')
hfeature_df = pd.read_csv('hfeature_vectors.csv')

# Add a Label column
feature_df['Label'] = 1
hfeature_df['Label'] = 0

# Concatenate the DataFrames
combined_df = pd.concat([feature_df, hfeature_df], ignore_index=True)

# Shuffle the combined DataFrame
shuffled_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the shuffled DataFrame to a new CSV file
shuffled_df.to_csv('shuffled_combined_features.csv', index=False)

# Print a success message
print("Files combined, shuffled, and saved as 'shuffled_combined_features.csv'.")

Files combined, shuffled, and saved as 'shuffled_combined_features.csv'.


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the shuffled dataset
data = pd.read_csv('shuffled_combined_features.csv')

# Separate features (X) and labels (y)
X = data.drop(columns=['Label'])  # Drop the label column to get features
y = data['Label']  # The label column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize the SVM model
svm_model = SVC(kernel='linear', random_state=42)  # You can change the kernel to 'rbf', 'poly', etc.

# Train the SVM model
svm_model.fit(X_train, y_train)

# Predict the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n")
print(report)


Accuracy: 0.89

Classification Report:

              precision    recall  f1-score   support

           0       0.87      0.92      0.89       140
           1       0.92      0.86      0.89       140

    accuracy                           0.89       280
   macro avg       0.89      0.89      0.89       280
weighted avg       0.89      0.89      0.89       280



In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the shuffled dataset
data = pd.read_csv('shuffled_combined_features.csv')

# Separate features (X) and labels (y)
X = data.drop(columns=['Label'])  # Drop the label column to get features
y = data['Label']  # The label column

for a in range(100):
  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=a, stratify=y)

  # Initialize the SVM model
  svm_model = SVC(kernel='linear', random_state=42)  # You can change the kernel to 'rbf', 'poly', etc.

  # Train the SVM model
  svm_model.fit(X_train, y_train)

  # Predict the test set
  y_pred = svm_model.predict(X_test)

  # Evaluate the model
  accuracy = accuracy_score(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  # Print the results
  print(f"Accuracy: {accuracy:.2f}")
  print("\nClassification Report:\n")
  print(report)


Accuracy: 0.91

Classification Report:

              precision    recall  f1-score   support

           0       0.89      0.94      0.91       140
           1       0.93      0.89      0.91       140

    accuracy                           0.91       280
   macro avg       0.91      0.91      0.91       280
weighted avg       0.91      0.91      0.91       280

Accuracy: 0.90

Classification Report:

              precision    recall  f1-score   support

           0       0.91      0.89      0.90       140
           1       0.89      0.91      0.90       140

    accuracy                           0.90       280
   macro avg       0.90      0.90      0.90       280
weighted avg       0.90      0.90      0.90       280

Accuracy: 0.91

Classification Report:

              precision    recall  f1-score   support

           0       0.92      0.89      0.91       140
           1       0.90      0.92      0.91       140

    accuracy                           0.91       280
   macro

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the shuffled dataset
data = pd.read_csv('shuffled_combined_features.csv')

# Fill null values with the average of the respective columns
data.fillna(data.mean(), inplace=True)

# Separate features (X) and labels (y)
X = data.drop(columns=['Label'])  # Drop the label column to get features
y = data['Label']  # The label column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize the SVM model
svm_model = SVC(kernel='linear', random_state=42)  # You can change the kernel to 'rbf', 'poly', etc.

# Train the SVM model
svm_model.fit(X_train, y_train)

# Predict the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n")
print(report)


Accuracy: 0.89

Classification Report:

              precision    recall  f1-score   support

           0       0.87      0.92      0.89       140
           1       0.92      0.86      0.89       140

    accuracy                           0.89       280
   macro avg       0.89      0.89      0.89       280
weighted avg       0.89      0.89      0.89       280



In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the shuffled dataset
data = pd.read_csv('shuffled_combined_features.csv')

# Fill null values with the average of the respective columns
data.fillna(data.mean(), inplace=True)

# Separate features (X) and labels (y)
X = data.drop(columns=['Label'])  # Drop the label column to get features
y = data['Label']  # The label column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest model
rf_model.fit(X_train, y_train)

# Predict the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n")
print(report)


Accuracy: 0.96

Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.99      0.97       140
           1       0.99      0.94      0.96       140

    accuracy                           0.96       280
   macro avg       0.97      0.96      0.96       280
weighted avg       0.97      0.96      0.96       280



In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the shuffled dataset
data = pd.read_csv('shuffled_combined_features.csv')

# Fill null values with the average of the respective columns
data.fillna(data.mean(), inplace=True)

# Separate features (X) and labels (y)
X = data.drop(columns=['Label'])  # Drop the label column to get features
y = data['Label']  # The label column
for a in range(100):
  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=a, stratify=y)

  # Initialize the Random Forest model
  rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

  # Train the Random Forest model
  rf_model.fit(X_train, y_train)

  # Predict the test set
  y_pred = rf_model.predict(X_test)

  # Evaluate the model
  accuracy = accuracy_score(y_test, y_pred)
  report = classification_report(y_test, y_pred)

  # Print the results
  print(f"Accuracy: {accuracy:.2f}")
  print("\nClassification Report:\n")
  print(report)


Accuracy: 0.96

Classification Report:

              precision    recall  f1-score   support

           0       0.93      0.99      0.96       140
           1       0.99      0.93      0.96       140

    accuracy                           0.96       280
   macro avg       0.96      0.96      0.96       280
weighted avg       0.96      0.96      0.96       280

Accuracy: 0.94

Classification Report:

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       140
           1       0.94      0.94      0.94       140

    accuracy                           0.94       280
   macro avg       0.94      0.94      0.94       280
weighted avg       0.94      0.94      0.94       280

Accuracy: 0.97

Classification Report:

              precision    recall  f1-score   support

           0       0.97      0.97      0.97       140
           1       0.97      0.97      0.97       140

    accuracy                           0.97       280
   macro