# Combining SHHS1 and SHHS2

In [1]:
import pandas as pd
shhs1 = pd.read_csv('shhs1-hrv-summary-0.20.0 (1).csv')
shhs2 = pd.read_csv('shhs2-hrv-summary-0.20.0 (1).csv')

In [2]:
shhs = pd.concat([shhs1, shhs2[~shhs2['nsrrid'].isin(shhs1['nsrrid'])]])

In [15]:
shhs.shape

(498, 22)

In [17]:
shhs.columns

Index(['nsrrid', 'visitnumber', 'NN_RR', 'AVNN', 'IHR', 'SDNN', 'SDANN',
       'SDNNIDX', 'rMSSD', 'pNN10', 'pNN20', 'pNN30', 'pNN40', 'pNN50',
       'tot_pwr', 'ULF', 'VLF', 'LF', 'HF', 'LF_HF', 'LF_n', 'HF_n'],
      dtype='object')

# Adding Gender, Age, BMI

In [5]:
harmonized = pd.read_csv("shhs-harmonized-dataset-0.20.0 (1).csv")

In [6]:
h1 = harmonized[harmonized['visitnumber'] == 1]
h2 = harmonized[harmonized['visitnumber'] == 2]

In [7]:
h = pd.concat([h1, h2[~h2['nsrrid'].isin(h1['nsrrid'])]])

In [8]:
harmonized.columns

Index(['nsrrid', 'visitnumber', 'nsrr_age', 'nsrr_age_gt89', 'nsrr_sex',
       'nsrr_race', 'nsrr_ethnicity', 'nsrr_bmi', 'nsrr_bp_systolic',
       'nsrr_bp_diastolic', 'nsrr_current_smoker', 'nsrr_ever_smoker',
       'nsrr_ahi_hp3u', 'nsrr_ahi_hp3r_aasm15', 'nsrr_ahi_hp4u_aasm15',
       'nsrr_ahi_hp4r', 'nsrr_ttldursp_f1', 'nsrr_phrnumar_f1',
       'nsrr_flag_spsw', 'nsrr_ttleffsp_f1', 'nsrr_ttllatsp_f1',
       'nsrr_ttlprdsp_s1sr', 'nsrr_ttldursp_s1sr', 'nsrr_ttldurws_f1',
       'nsrr_pctdursp_s1', 'nsrr_pctdursp_s2', 'nsrr_pctdursp_s3',
       'nsrr_pctdursp_sr', 'nsrr_ttlprdbd_f1'],
      dtype='object')

In [10]:
hcut = h[['nsrrid','nsrr_age','nsrr_age_gt89','nsrr_sex','nsrr_bmi','nsrr_ahi_hp3u', 'nsrr_ahi_hp3r_aasm15', 'nsrr_ahi_hp4u_aasm15',
       'nsrr_ahi_hp4r','nsrr_current_smoker']]

In [11]:
step2 = shhs.merge(hcut, on='nsrrid')

In [12]:
step2['nsrr_sex'] = step2['nsrr_sex'].apply(lambda x: 0 if x == 'male' else 1)

In [13]:
step2['nsrr_age_gt89'] = step2['nsrr_age_gt89'].apply(lambda x: 0 if x == 'no' else 1)

In [14]:
step2['nsrr_current_smoker'] = step2['nsrr_current_smoker'].apply(lambda x: 0 if x == 'no' else 1)

# Adding Neck Circumference

In [18]:
shhs1dataset = pd.read_csv('shhs1-dataset-0.20.0 (1).csv')

  shhs1dataset = pd.read_csv('shhs1-dataset-0.20.0 (1).csv')


In [19]:
shhs1datasetcut = shhs1dataset[['nsrrid','neck20']]

In [20]:
step3 = step2.merge(shhs1datasetcut, on = 'nsrrid')

In [21]:
step3filter = step3[step3['visitnumber']==2]

# Adding Sleep Apnea Label

In [22]:
def label_ahi_severity(ahi_value):
    if ahi_value >= 5 and ahi_value < 15:
        return 'Mild'
    elif ahi_value >= 15 and ahi_value < 30:
        return 'Moderate'
    elif ahi_value >= 30:
        return 'Severe'
    else:
        return 'No Apnea'  # You can adjust this if you want to handle other cases

# Apply the function to create a new column 'ahi_severity'
step4 = step3
step4['ahi_severity'] = step4['nsrr_ahi_hp3u'].apply(label_ahi_severity)

In [23]:
step4['Apnea_digit'] = step4['ahi_severity'].apply(lambda x: 0 if x == 'No Apnea' else 1)

In [24]:
step4['Apnea_Category'] = step4['ahi_severity'].apply(lambda x: 'No Apnea' if x == 'No Apnea' else 'Apnea')

# Exclude Arrythmia & Events (can cause bias to HRV)

In [25]:
events = pd.read_csv("shhs-cvd-events-dataset-0.20.0 (1).csv")

In [26]:
unique_events_nsrrid = events['nsrrid'].unique()

# Filter the sleeping records dataset based on nsrrid
step5 = step4[~step4['nsrrid'].isin(unique_events_nsrrid)]

In [27]:
unique_events_nsrrid = pd.DataFrame(unique_events_nsrrid)

In [28]:
fixed = step5
fixed.columns
#fixed.to_excel('Data.xlsx')

Index(['nsrrid', 'visitnumber', 'NN_RR', 'AVNN', 'IHR', 'SDNN', 'SDANN',
       'SDNNIDX', 'rMSSD', 'pNN10', 'pNN20', 'pNN30', 'pNN40', 'pNN50',
       'tot_pwr', 'ULF', 'VLF', 'LF', 'HF', 'LF_HF', 'LF_n', 'HF_n',
       'nsrr_age', 'nsrr_age_gt89', 'nsrr_sex', 'nsrr_bmi', 'nsrr_ahi_hp3u',
       'nsrr_ahi_hp3r_aasm15', 'nsrr_ahi_hp4u_aasm15', 'nsrr_ahi_hp4r',
       'nsrr_current_smoker', 'neck20', 'ahi_severity', 'Apnea_digit',
       'Apnea_Category'],
      dtype='object')