In [16]:
from omni_ieeg.dataloader.datafilter import DataFilter
from omni_ieeg.utils.utils_edf import concate_edf
from tqdm import tqdm
import random
import os
import pandas as pd
random.seed(42)
dataset = "openieeg"
method = "cnn_cross_patient"
filter_list = []
dataset_folder = "/mnt/SSD1/nipsdataset/dataset/ominiieeg"
event_model_preds_folder = "/mnt/SSD1/nipsdataset/channel_training/new_event_model_preds/"
save_folder = "/mnt/SSD1/nipsdataset/channel_training/new_event_model/"
os.makedirs(save_folder, exist_ok=True)



    
    

In [17]:
def calculate_events(df, list_of_filters, edf_length):
    # assert there is only one unique participant
    assert len(df['participant'].unique()) == 1
    df = df[df['is_boundary'] == False]
    # Filter based on values in list_of_filters
    print(f"Before filtering: {len(df)}")
    for filter_name in list_of_filters:
        if filter_name in df.columns:
            df = df[df[filter_name] > 0.5]
        elif f"{filter_name}_pred" in df.columns:
            df = df[df[f"{filter_name}_pred"] > 0.5]
        else:
            raise ValueError(f"Filter {filter_name} not found in dataframe")
    print(f"After filtering: {len(df)}")
    
    # group by name, count how many events
    df = df.groupby('name').size().reset_index(name='count')
    df['count_per_second'] = df['count'] / edf_length
    # print(df)
    return df
    
    
    

    
    #
    

In [18]:
data_filter = DataFilter(dataset_folder)


# we want patient that has outcome, edf that is non-ictal, and has both soz and resection channels
filtered_dataset = data_filter.apply_filters(
    patient_filter=DataFilter.has_outcome_filter(),
    edf_filter=DataFilter.non_ictal_filter(),
    channel_filter=DataFilter.both_soz_and_resection_filter
)

patients = filtered_dataset.get_patients(dataset=dataset)
print(f"Found {len(patients)} patients")


for patient in patients:
    print(f"Processing {patient}")
    edf_files = filtered_dataset.get_edfs_by_patient(patient)
    patient_dfs = []
    for edf_file in edf_files:
        corresponding_channel_df = filtered_dataset.get_channels_for_edf(edf_file)
        assert corresponding_channel_df['edf_length'].unique().size == 1
        edf_length = corresponding_channel_df['edf_length'].iloc[0]
        if len(filter_list) == 0:
            base_label = "artifact"
            base_file = edf_file.replace(dataset_folder, os.path.join(event_model_preds_folder, method, base_label))
            base_file = base_file.replace(".edf", ".csv")
            assert os.path.exists(base_file), f"Preds file {base_file} does not exist"
            
            merged_df = pd.read_csv(base_file)
            event_df = calculate_events(merged_df, filter_list, edf_length)
            patient_dfs.append(event_df)
            continue
        
        # Read the first target label CSV as base
        base_label = filter_list[0]
        base_file = edf_file.replace(dataset_folder, os.path.join(event_model_preds_folder, method, base_label))
        base_file = base_file.replace(".edf", ".csv")
        assert os.path.exists(base_file), f"Preds file {base_file} does not exist"
        
        merged_df = pd.read_csv(base_file)
        
        # Find the prediction column
        pred_cols = [col for col in merged_df.columns if 'pred' in col]
        if pred_cols:
            # Rename prediction column to include target label
            merged_df = merged_df.rename(columns={pred_cols[0]: f"{base_label}_pred"})
        
        # Get non-prediction columns to verify they match in all files
        non_pred_cols = [col for col in merged_df.columns if 'pred' not in col]
        # Add other target labels' prediction columns
        for target_label in filter_list[1:]:
            current_file = edf_file.replace(dataset_folder, os.path.join(event_model_preds_folder, method, target_label))
            current_file = current_file.replace(".edf", ".csv")
            assert os.path.exists(current_file), f"Preds file {current_file} does not exist"
            
            current_df = pd.read_csv(current_file)
            
            # Assert other columns match
            current_non_pred_cols = [col for col in current_df.columns if 'pred' not in col]
            assert set(non_pred_cols) == set(current_non_pred_cols), f"Non-prediction columns don't match for {target_label}"
            
            # Check data equality in non-prediction columns
            for col in non_pred_cols:
                assert current_df[col].equals(merged_df[col]), f"Column {col} has different values in {target_label} file"
            
            # Find and add prediction column
            current_pred_cols = [col for col in current_df.columns if 'pred' in col]
            if current_pred_cols:
                merged_df[f"{target_label}_pred"] = current_df[current_pred_cols[0]]
        event_df = calculate_events(merged_df, filter_list, edf_length)
        patient_dfs.append(event_df)
    
     # Concatenate all dataframes
    patient_all_events = pd.concat(patient_dfs, ignore_index=True)
    # Group by name and calculate means
    averaged_events = patient_all_events.groupby('name').agg({
        'count': 'mean',
        'count_per_second': 'mean'
    }).reset_index()
    
    channel_df = filtered_dataset.get_merged_channels_for_patient(patient)
    merged_paitnet_df = pd.merge(averaged_events, channel_df, on='name', how='inner')
    os.makedirs(os.path.join(save_folder, method, f"filter_{filter_list}"), exist_ok=True)
    merged_paitnet_df.to_csv(os.path.join(save_folder, method, f"filter_{filter_list}", f"{patient}.csv"), index=False)
        
        
        
        

Found 162 patients
Processing sub-openieegDetroit001
Before filtering: 2642
After filtering: 2642
Processing sub-openieegDetroit002
Before filtering: 4139
After filtering: 4139
Processing sub-openieegDetroit003
Before filtering: 2631
After filtering: 2631
Processing sub-openieegDetroit004
Before filtering: 727
After filtering: 727
Processing sub-openieegDetroit005
Before filtering: 4308
After filtering: 4308
Processing sub-openieegDetroit006
Before filtering: 816
After filtering: 816
Processing sub-openieegDetroit007
Before filtering: 2030
After filtering: 2030
Processing sub-openieegDetroit008
Before filtering: 1416
After filtering: 1416
Processing sub-openieegDetroit009
Before filtering: 1080
After filtering: 1080
Processing sub-openieegDetroit010
Before filtering: 5751
After filtering: 5751
Processing sub-openieegDetroit011
Before filtering: 361
After filtering: 361
Processing sub-openieegDetroit012
Before filtering: 2868
After filtering: 2868
Processing sub-openieegDetroit013
Befor