In [1]:
import torch
print(torch.cuda.is_available())

True


In [10]:
import pandas as pd

# Load the scp_statements.csv file
scp_statements_path = r".\ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3\ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3\scp_statements.csv"
scp_statements = pd.read_csv(scp_statements_path)

# List of relevant descriptions for Sinus, AfIB, and Heart Block
sinus_descriptions = ['sinus rhythm', 'normal functioning artificial pacemaker', 'normal ECG']
afib_descriptions = ['atrial fibrillation', 'atrial flutter']
heart_block_descriptions = [
    'AV block', 'first degree AV block', 'second degree AV block', 'third degree AV block', 
    'left bundle branch block', 'right bundle branch block', 'incomplete left bundle branch block', 
    'incomplete right bundle branch block', 'left anterior fascicular block', 'right posterior fascicular block'
]

# Filter for Sinus (normal) based on descriptions
filtered_by_sinus = scp_statements[scp_statements['description'].str.contains('|'.join(sinus_descriptions), case=False, na=False)]
filtered_by_sinus['group'] = 'Sinus'

# Filter for AfIB based on descriptions
filtered_by_afib = scp_statements[scp_statements['description'].str.contains('|'.join(afib_descriptions), case=False, na=False)]
filtered_by_afib['group'] = 'AFib'

# Filter for Heart Block based on descriptions
filtered_by_heart_block = scp_statements[scp_statements['description'].str.contains('|'.join(heart_block_descriptions), case=False, na=False)]
filtered_by_heart_block['group'] = 'Heart Block'

# Combine all the filtered data into one DataFrame
combined_filtered_df = pd.concat([filtered_by_sinus, filtered_by_afib, filtered_by_heart_block]).drop_duplicates()

# Display the resulting DataFrame
print("Combined filtered DataFrame with appropriate groups:")
print(combined_filtered_df[['diagnostic_class', 'description', 'group', 'scp_code']])


Combined filtered DataFrame with appropriate groups:
   diagnostic_class                              description        group  \
4              NORM                               normal ECG        Sinus   
59              NaN                             sinus rhythm        Sinus   
64              NaN  normal functioning artificial pacemaker        Sinus   
60              NaN                      atrial fibrillation         AFib   
67              NaN                           atrial flutter         AFib   
8                CD           left anterior fascicular block  Heart Block   
10               CD     incomplete right bundle branch block  Heart Block   
11               CD                    first degree AV block  Heart Block   
14               CD       complete right bundle branch block  Heart Block   
15               CD        complete left bundle branch block  Heart Block   
33               CD      incomplete left bundle branch block  Heart Block   
41               CD    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_by_sinus['group'] = 'Sinus'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_by_afib['group'] = 'AFib'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_by_heart_block['group'] = 'Heart Block'


In [12]:
import pandas as pd

valid_scp_codes = combined_filtered_df['scp_code'].dropna().unique()
valid_scp_codes

array(['NORM', 'SR', 'PACE', 'AFIB', 'AFLT', 'LAFB', 'IRBBB', '1AVB',
       'CRBBB', 'CLBBB', 'ILBBB', '3AVB', '2AVB'], dtype=object)

In [15]:
import ast

ptbxl_df = pd.read_csv('.\ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3\ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.3\ptbxl_database.csv')

def get_highest_scp_code(scp_dict):
    scp_dict = ast.literal_eval(scp_dict)
    return max(scp_dict, key=scp_dict.get)

ptbxl_df['highest_scp_code'] = ptbxl_df['scp_codes'].apply(get_highest_scp_code)
ptbxl_df.head()

Unnamed: 0,ecg_id,patient_id,age,sex,height,weight,nurse,site,device,recording_date,...,baseline_drift,static_noise,burst_noise,electrodes_problems,extra_beats,pacemaker,strat_fold,filename_lr,filename_hr,highest_scp_code
0,1,15709.0,56.0,1,,63.0,2.0,0.0,CS-12 E,1984-11-09 09:17:34,...,,", I-V1,",,,,,3,records100/00000/00001_lr,records500/00000/00001_hr,NORM
1,2,13243.0,19.0,0,,70.0,2.0,0.0,CS-12 E,1984-11-14 12:55:37,...,,,,,,,2,records100/00000/00002_lr,records500/00000/00002_hr,NORM
2,3,20372.0,37.0,1,,69.0,2.0,0.0,CS-12 E,1984-11-15 12:49:10,...,,,,,,,5,records100/00000/00003_lr,records500/00000/00003_hr,NORM
3,4,17014.0,24.0,0,,82.0,2.0,0.0,CS-12 E,1984-11-15 13:44:57,...,", II,III,AVF",,,,,,3,records100/00000/00004_lr,records500/00000/00004_hr,NORM
4,5,17448.0,19.0,1,,70.0,2.0,0.0,CS-12 E,1984-11-17 10:43:15,...,", III,AVR,AVF",,,,,,4,records100/00000/00005_lr,records500/00000/00005_hr,NORM


In [16]:
filtered_ptbxl_df = ptbxl_df[ptbxl_df['highest_scp_code'].isin(valid_scp_codes)]
filtered_ptbxl_df

Unnamed: 0,ecg_id,patient_id,age,sex,height,weight,nurse,site,device,recording_date,...,baseline_drift,static_noise,burst_noise,electrodes_problems,extra_beats,pacemaker,strat_fold,filename_lr,filename_hr,highest_scp_code
0,1,15709.0,56.0,1,,63.0,2.0,0.0,CS-12 E,1984-11-09 09:17:34,...,,", I-V1,",,,,,3,records100/00000/00001_lr,records500/00000/00001_hr,NORM
1,2,13243.0,19.0,0,,70.0,2.0,0.0,CS-12 E,1984-11-14 12:55:37,...,,,,,,,2,records100/00000/00002_lr,records500/00000/00002_hr,NORM
2,3,20372.0,37.0,1,,69.0,2.0,0.0,CS-12 E,1984-11-15 12:49:10,...,,,,,,,5,records100/00000/00003_lr,records500/00000/00003_hr,NORM
3,4,17014.0,24.0,0,,82.0,2.0,0.0,CS-12 E,1984-11-15 13:44:57,...,", II,III,AVF",,,,,,3,records100/00000/00004_lr,records500/00000/00004_hr,NORM
4,5,17448.0,19.0,1,,70.0,2.0,0.0,CS-12 E,1984-11-17 10:43:15,...,", III,AVR,AVF",,,,,,4,records100/00000/00005_lr,records500/00000/00005_hr,NORM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21792,21831,11905.0,55.0,1,,,1.0,2.0,AT-60 3,2001-05-28 12:49:25,...,,,,,,,9,records100/21000/21831_lr,records500/21000/21831_hr,NORM
21793,21832,7954.0,63.0,0,,,1.0,2.0,AT-60 3,2001-05-30 14:14:25,...,,,,,,,7,records100/21000/21832_lr,records500/21000/21832_hr,LAFB
21795,21834,20703.0,300.0,0,,,1.0,2.0,AT-60 3,2001-06-05 11:33:39,...,,,,,,,4,records100/21000/21834_lr,records500/21000/21834_hr,NORM
21797,21836,8873.0,64.0,1,,,1.0,2.0,AT-60 3,2001-06-09 18:21:49,...,,,,,SVES,,8,records100/21000/21836_lr,records500/21000/21836_hr,NORM
