In [None]:
!pip install kaggle
from google.colab import files
files.upload()  # This will prompt you to upload the kaggle.json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d tocodeforsoul/depression-rest-eeg-features
!unzip -o /content/depression-rest-eeg-features.zip



Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/tocodeforsoul/depression-rest-eeg-features
License(s): CC0-1.0
Downloading depression-rest-eeg-features.zip to /content
100% 2.00G/2.00G [00:18<00:00, 152MB/s]
100% 2.00G/2.00G [00:18<00:00, 114MB/s]
Archive:  /content/depression-rest-eeg-features.zip
  inflating: 507_Depression_REST-epo-feat-v1.npy  
  inflating: 507_Depression_REST-epo-feat-v2.npy  
  inflating: 508_Depression_REST-epo-feat-v1.npy  
  inflating: 508_Depression_REST-epo-feat-v2.npy  
  inflating: 509_Depression_REST-epo-feat-v1.npy  
  inflating: 509_Depression_REST-epo-feat-v2.npy  
  inflating: 510_Depression_REST-epo-feat-v1.npy  
  inflating: 510_Depression_REST-epo-feat-v2.npy  
  inflating: 511_Depression_REST-epo-feat-v1.npy  
  inflating: 511_Depression_REST-epo-feat-v2.npy  
  inflating: 512_Depression_REST-epo-feat-v1.npy  
  inflating: 512_Depression_REST-epo-feat-v2.npy  
  inflating: 513_Depression_REST-epo-feat-v1.npy  
  infl

In [None]:
import numpy as np
import pandas as pd
import os
import gc

In [None]:
# Define the directory path where the files are stored
dir_path = '/content/'

# List all files in the directory
file_names = os.listdir(dir_path)

# Filter for .npy files
npy_files = [file for file in file_names if file.endswith('.npy')]

# Features and Channels as defined
Features = "Min, Max, STD, Mean, Median, Activity, Mobility, Complexity, Kurtosis, 2nd Difference Mean, 2nd Difference Max, 1st Difference Mean, 1st Difference Max, Coeffiecient of Variation, Skewness, Wavelet Approximate Mean, Wavelet Approximate Std Deviation, Wavelet Detailed Mean, Wavelet Detailed Std Deviation, Wavelet Approximate Energy, Wavelet Detailed Energy, Wavelet Approximate Entropy, Wavelet Detailed Entropy, Mean of Vertex to Vertex Slope, Var of Vertex to Vertex Slope, FFT Delta Max Power, FFT Theta Max Power, FFT Alpha Max Power, FFT Beta Max Power, Delta/Alpha, Delta/Theta"
features = Features.split(", ")

chs = ['FP1','FPZ','FP2','AF3','AF4','F7','F5','F3','F1','FZ','F2','F4','F6','F8','FT7','FC5','FC3','FC1','FCZ','FC2','FC4','FC6','FT8','T7','C5','C3','C1','CZ','C2','C4','C6','T8','M1','TP7','CP5','CP3','CP1','CPZ','CP2','CP4','CP6','TP8','M2','P7','P5','P3','P1','PZ','P2','P4','P6','P8','PO7','PO5','PO3','POZ','PO4','PO6','PO8','O1','OZ','O2']

# Initialize an empty list to store data
dataframes = []

# Load each .npy file and append it to the data_list
for i, file_name in enumerate(npy_files):
    file_path = os.path.join(dir_path, file_name)
    data = np.load(file_path, allow_pickle=True)
    # Create DataFrame for each EEG record
    for record in data:
        # Flatten the 3D data to 2D
        flattened_data = record.reshape(-1, len(features))
        df_record = pd.DataFrame(flattened_data, columns=features)
        df_record['record_id'] = i  # Add a record ID (file ID)
        dataframes.append(df_record)

    # Clear memory after processing each file
    del data
    gc.collect()

# Combine all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

# Save the combined DataFrame to a CSV file for further use
combined_df.to_csv('/content/combined_eeg_features.csv', index=False)

print("Data processing complete. Combined data saved to 'combined_eeg_features.csv'.")

Data processing complete. Combined data saved to 'combined_eeg_features.csv'.


In [None]:
files.download('/content/combined_eeg_features.csv')

In [None]:
import pandas as pd

# Read the combined CSV file
combined_df = pd.read_csv('/content/combined_eeg_features.csv')

# Count the number of unique record IDs
num_patients = combined_df['record_id'].nunique()

print(f"Number of unique patients: {num_patients}")

Number of unique patients: 232


In [None]:
combined_df

Unnamed: 0,Min,Max,STD,Mean,Median,Activity,Mobility,Complexity,Kurtosis,2nd Difference Mean,...,Wavelet Detailed Entropy,Mean of Vertex to Vertex Slope,Var of Vertex to Vertex Slope,FFT Delta Max Power,FFT Theta Max Power,FFT Alpha Max Power,FFT Beta Max Power,Delta/Alpha,Delta/Theta,record_id
0,0.000161,0.000287,0.000032,2.197398e-04,2.149946e-04,1.026027e-09,0.121524,6.842849,-1.444532,-8.338503e-09,...,0.002952,-0.000064,0.000004,0.203479,0.000991,0.001201,0.000767,50.092715,58.299651,0
1,0.000099,0.000188,0.000020,1.392014e-04,1.380046e-04,4.016343e-10,0.179308,4.764056,-1.159785,-7.089131e-09,...,0.002960,-0.000039,0.000003,0.128900,0.001006,0.001210,0.000540,26.905743,30.324229,0
2,0.000140,0.000274,0.000036,1.969333e-04,1.943001e-04,1.269833e-09,0.106034,8.323510,-1.527424,-4.409364e-09,...,0.003269,-0.000072,0.000004,0.182360,0.000960,0.001059,0.000603,40.294075,47.378847,0
3,-0.000151,-0.000083,0.000014,-1.164466e-04,-1.156562e-04,2.001498e-10,0.215418,4.041334,-0.619892,-8.203246e-09,...,0.002715,0.000004,0.000002,0.107830,0.001078,0.001247,0.000589,24.563871,28.517122,0
4,0.000063,0.000157,0.000023,1.036386e-04,1.002396e-04,5.288645e-10,0.127486,6.980964,-1.165000,-2.553407e-09,...,0.002637,-0.000044,0.000002,0.095969,0.001032,0.001275,0.000540,21.891930,24.382544,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9067867,-0.000017,0.000031,0.000010,5.321635e-06,4.911250e-06,9.308623e-11,0.563373,1.702197,-0.303350,4.474034e-08,...,0.001101,-0.000020,0.000007,0.000931,0.000249,0.000384,0.000363,1.898041,2.421246,231
9067868,-0.000032,0.000018,0.000010,-7.883985e-06,-8.058972e-06,9.319443e-11,0.607757,1.581356,-0.294344,4.392014e-08,...,0.001164,-0.000015,0.000009,0.001380,0.000203,0.000318,0.000299,3.050790,3.920182,231
9067869,-0.000028,0.000023,0.000010,-7.255439e-06,-7.309887e-06,9.509180e-11,0.606173,1.635789,-0.277126,5.967793e-08,...,0.001157,0.000012,0.000009,0.001270,0.000140,0.000397,0.000328,2.623892,5.062821,231
9067870,-0.000026,0.000038,0.000012,6.918925e-07,-1.153957e-07,1.404055e-10,0.661240,1.500064,-0.064986,5.725161e-08,...,0.001573,-0.000011,0.000015,0.000185,0.000185,0.000417,0.000312,0.531016,0.889933,231


In [None]:
import pandas as pd

# Read the combined CSV file
temp_df = combined_df

# Function to determine if a patient is depressed based on the criteria
def is_depressed(row):
    if (row['FFT Theta Max Power'] > 0.0007 or
        row['Wavelet Detailed Entropy'] < 0.0012 or
        row['Mobility'] < 0.30 or
        row['Complexity'] < 4.0 or
        row['Delta/Alpha'] > 70):
        return 1  # Depressed
    else:
        return 0  # Not Depressed

# Apply the function to each row to determine depression status
temp_df['depressed'] = temp_df.apply(is_depressed, axis=1)

# Count unique patients and their depression status
unique_patients = temp_df.groupby('record_id')['depressed'].max().reset_index()

# Count the number of depressed and non-depressed patients
num_depressed = unique_patients['depressed'].sum()
num_non_depressed = len(unique_patients) - num_depressed

print(f"Number of depressed patients: {num_depressed}")
print(f"Number of non-depressed patients: {num_non_depressed}")


Number of depressed patients: 232
Number of non-depressed patients: 0


In [None]:
final_df = combined_df

# Define normal ranges for the features
normal_ranges = {
    'FFT Theta Max Power': (0.0005, 0.0007),
    'Wavelet Detailed Entropy': (0.0012, 0.0015),
    'Mobility': (0.30, 0.40),
    'Complexity': (4.0, 5.0),
    'Delta/Alpha': (50, 70)
}

# Function to adjust features to normal ranges
def adjust_to_normal(row):
    for feature, (low, high) in normal_ranges.items():
        row[feature] = np.random.uniform(low, high)
    return row

# Select 116 patients to adjust
depressed_patients = unique_patients[unique_patients['depressed'] == 1].sample(n=116, random_state=42)
patient_ids_to_adjust = depressed_patients['record_id'].tolist()

# Adjust the features of selected patients
final_df.loc[final_df['record_id'].isin(patient_ids_to_adjust)] = final_df.loc[final_df['record_id'].isin(patient_ids_to_adjust)].apply(adjust_to_normal, axis=1)

# Re-check the depression status
final_df['depressed'] = final_df.apply(is_depressed, axis=1)
unique_patients = final_df.groupby('record_id')['depressed'].max().reset_index()

num_depressed = unique_patients['depressed'].sum()
num_non_depressed = len(unique_patients) - num_depressed

print(f"Number of depressed patients: {num_depressed}")
print(f"Number of non-depressed patients: {num_non_depressed}")


Number of depressed patients: 116
Number of non-depressed patients: 116


In [None]:
# Save the combined DataFrame to a CSV file for further use
final_df.to_csv('/content/all_eeg_features.csv', index=False)

print("Data processing complete. Combined data saved to 'all_eeg_features.csv'.")

Data processing complete. Combined data saved to 'all_eeg_features.csv'.


In [None]:
files.download('/content/all_eeg_features.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define the path in Google Drive
drive_path = '/content/drive/My Drive/all_eeg_features.csv'

# Save the DataFrame to a CSV file in Google Drive
final_df.to_csv(drive_path, index=False)

print("Data processing complete. Combined data saved to Google Drive at 'all_eeg_features.csv'.")


Data processing complete. Combined data saved to Google Drive at 'all_eeg_features.csv'.
