In [1]:
# Declaration

import pandas as pd
import numpy as np
from enum import Enum
from enum import IntEnum

class Disease(IntEnum):
    CHD = 53741008
    DIABETES = 44054006
    HYPERTENSION = 38341003
    MI = 22298006
    STROKE = 230690007
    COPD = 185086009

class DrugDiabetes(IntEnum):
    METFORMIN = 860975
    INSULIN = 106892
    GLP1 = 897122
    SGLT2 = 1373463
    
class DrugCOPD(IntEnum):
    FLUTICASONE = 896188

class Observation(Enum):
    BH = '8302-2'
    BW = '29463-7'
    BMI = '39156-5'
    DIASTOLIC = '8462-4'
    GLUCOSE = '2339-0'
    HBA1C = '4548-4'
    SYSTOLIC = '8480-6'
    FEV1 = '19926-5'


## LOAD & CLEANING DATA PER SET

source : set(setnumber)\_raw/
dest   : set(setnumber)/

In [2]:
cleaning_set = 'set1'

# loading data
def load_data(type):
    data = pd.read_csv(cleaning_set + '/' + type + '.csv')
    return data

In [None]:
# cleaning - patients
def cleaning_patients():
    # patients
    pat = pd.read_csv(cleaning_set + '_raw/patients.csv', sep=';', usecols=range(17))
    pat = pat.drop(
        ['SSN', 'DRIVERS', 'PASSPORT', 'PREFIX', 'FIRST', 'LAST', 'SUFFIX', 'MARITAL', 'MAIDEN', 'BIRTHPLACE', 'ADDRESS' ], 
        axis=1
    )

    # clean data due to malformed csv
    race_list = ['asian', 'white', 'black', 'hispanic']
    pat['RACE'].replace(pat[~pat['RACE'].isin(race_list)]['RACE'].array, np.nan, inplace=True)
    pat = pat[pat['RACE'].notna()]

    ethnicity_list = ['african', 'american', 'american_indian', 'asian_indian', 'central_american', 'chinese', 'dominican', 'english', 'french', 'french_canadian', 'german', 'irish', 'italian', 'mexican', 'polish', 'portuguese', 'puerto_rican', 'russian', 'scottish', 'swedish', 'west_indian']
    pat['ETHNICITY'].replace(pat[~pat['ETHNICITY'].isin(ethnicity_list)]['ETHNICITY'].array, np.nan, inplace=True)

    gender_list = ['F', 'M']
    pat['GENDER'].replace(pat[~pat['GENDER'].isin(gender_list)]['GENDER'].array, np.nan, inplace=True)
    pat = pat[pat['GENDER'].notna()]

    # save
    pat.to_csv(cleaning_set + '/patients.csv', index=False)

cleaning_patients()

In [None]:
# cleaning - medications

def cleaning_medications():
    # remove all medications without patient data
    pat = load_data('patients')

    med = pd.read_csv(cleaning_set + '_raw/medications.csv')        
    med = med[med['PATIENT'].isin(pat['ID'])]
    med = med[~med['REASONCODE'].isna()]
    med = med.drop_duplicates()
    med['REASONCODE'] = med['REASONCODE'].astype('int')

    #med['STOP'] = med['STOP'].fillna('9999-01-01')


    # save
    med.to_csv(cleaning_set + '/medications.csv', index=False)
    display(med)

cleaning_medications()

In [None]:
# cleaning - encounters

def cleaning_encounters():
    # remove all encounters without patient data
    pat = load_data('patients')

    enc = pd.read_csv(cleaning_set + '_raw/encounters.csv')
    # most of it is null
    enc = enc.drop(
        ['DESCRIPTION', 'REASONDESCRIPTION'], 
        axis=1
    )
    enc = enc[enc['PATIENT'].isin(pat['ID'])]

    # save
    enc.to_csv(cleaning_set + '/encounters.csv', index=False)
    
cleaning_encounters()

In [None]:
# cleaning - observations

def cleaning_observations():
    # remove all encounters without patient data
    pat = load_data('patients')
    obs = pd.read_csv(cleaning_set + '_raw/observations.csv')
    obs = obs[obs['PATIENT'].isin(pat['ID'])]
    obs = obs[~obs['VALUE'].isna()]
    obs = obs.loc[
        (obs.CODE==Observation.BH.value)
        | (obs.CODE==Observation.BW.value)
        | (obs.CODE==Observation.BMI.value)
        | (obs.CODE==Observation.DIASTOLIC.value)
        | (obs.CODE==Observation.HBA1C.value)
        | (obs.CODE==Observation.SYSTOLIC.value)
        | (obs.CODE==Observation.FEV1.value)
    ]
    obs = obs.drop_duplicates()
    # save
    obs.to_csv(cleaning_set + '/observations.csv', index=False)

cleaning_observations()

In [11]:
# cleaning - conditions

def cleaning_conditions():
    pat = load_data('patients')
    con = pd.read_csv(cleaning_set + '_raw/conditions.csv')
    con = con[con['PATIENT'].isin(pat['ID'])]
    con = con[
        (con['CODE'] == Disease.DIABETES)
        | (con['CODE'] == Disease.HYPERTENSION)
        | (con['CODE'] == Disease.CHD)
        | (con['CODE'] == Disease.MI)
        | (con['CODE'] == Disease.STROKE)
        | (con['CODE'] == Disease.COPD)
    ]
    con = con.drop_duplicates()

    #save
    con.to_csv(cleaning_set + '/conditions.csv', index=False)

cleaning_conditions()


## CONCAT ALL DATASET FROM ALL SET PER TYPE

source : set1/, set2/, set3/, ...

target : set_full/

files :
- patients.csv : ID, BIRTHDATE, DEATHDATE, RACE, ETHNICITY, GENDER
- medications.csv  : START, STOP, PATIENT, ENCOUNTER, CODE, DESCRIPTION, REASONCODE, REASONDESCRIPTION
- encounters.csv   : ID, DATE, PATIENT, CODE, REASONCODE
- observations.csv : DATE, PATIENT, ENCOUNTER, CODE, DESCRIPTION, VALUE, UNITS

In [12]:
# merge all cleaned data
def concat_set(type, max_set):
    result = pd.read_csv('set1/' + type + '.csv')
    for i in range(2, max_set+1):
        temp = pd.read_csv('set' + str(i) + '/' + type + '.csv')
        result = result.append(temp).reset_index(drop=True)
    result.to_csv('set_full/' + type + '.csv', index=False)
    return result

# [patients | medications | observations | encounters | conditions]
data = concat_set('conditions', 9)
#data

In [None]:
# Cleaning full patient data (date fix)
# Keep only patients who have medication record

def cleaning_patients_fix_date():
    # load data
    patients = pd.read_csv('set_full/patients.csv')

    medications = pd.read_csv('set_full/medications.csv')

    # MERGE : PATIENT + MEDICATION

    patient_ids = medications['PATIENT'].unique()
    patients = patients[patients['ID'].isin(patient_ids)]

    merged = patients.merge(medications, left_on='ID', right_on='PATIENT', how='left')

    # Convert data types
    merged['BIRTHDATE'] = pd.to_datetime(merged['BIRTHDATE'], format='%d/%m/%y')
    merged['DEATHDATE'] = pd.to_datetime(merged['DEATHDATE'], format='%d/%m/%y')
    merged['START'] = pd.to_datetime(merged['START'], format='%Y-%m-%d')

    merged = merged[(merged['DEATHDATE'].isna()) | (merged['DEATHDATE'] < pd.to_datetime('2015-01-01'))]

    # Fix 2 digits of year misinterpretation
    merged.loc[merged['BIRTHDATE'] > merged['START'], 'BIRTHDATE'] = merged['BIRTHDATE'] - pd.offsets.DateOffset(years=100)
    merged.loc[merged['DEATHDATE'] > pd.to_datetime('2020-12-31'), 'DEATHDATE'] = merged['DEATHDATE'] - pd.offsets.DateOffset(years=100)

    # Add additional information
    # dead status
    merged['DEAD'] =  np.where(merged['DEATHDATE'].isnull(), 0, 1)
    # age when dead (accuracy up to month level)
    merged['AGEDEAD'] = merged['DEATHDATE'].dt.year - merged['BIRTHDATE'].dt.year - (merged['DEATHDATE'].dt.month < merged['BIRTHDATE'].dt.month)

    # Drop unused columns
    merged.drop(['PATIENT', 'ENCOUNTER', 'START', 'STOP', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'], axis=1, inplace=True, errors='ignore')

    merged = merged.drop_duplicates().reset_index(drop=True)

    merged.to_csv('set_full/patient_fixed_date.csv', index=False)

cleaning_patients_fix_date() 

## DIABETES


## Characteristic of the medication data

Fact :
- All metformin do not have STOP date
- There is no record of someone receive > 1 record of metformin

Set used : 1-3
Medication durations per record (in days)
- METFORMIN
    - mean : -
    - median : -
- SGLT2
    - mean : 354.24 (~ 1 year)
    - median : 352 (~ 1 year)
- GLP1
    - mean : 992.80 (~ 3 year)
    - median : 659 (~ 2 years)
- INSULIN
    - count : 396
    - mean : 1923.86 (~ 5 years)
    - median : 631 (~ 2 years)

The data above shows that the duration is not a date when the patient get drugs from prescription. It is possible that during periode between start and date, the patient has more than one prescription.

Base on that fact, it is reasonable to make assumptions:
- those who receive metformin as their first medication will always use it forever because metformin has no end date
- if there is another medication found after metformin, then it will be a double or triple medications combined with it (this is relevant with NICE guideline)

Question :
- for the intensification assumption, can it apply for insulin?
  according to NICE insulin is a single medication and not to be used together with other medication


In [None]:
# load medications
medications = pd.read_csv('set_full/medications.csv')
# diabetes only
medications = medications[medications['REASONCODE'] == Disease.DIABETES].reset_index(drop=True)
# drop unused columns
medications = medications.drop(['REASONCODE', 'REASONDESCRIPTION'], axis=1)

medications['START'] = pd.to_datetime(medications['START'], format='%Y-%m-%d')
medications['STOP'] = pd.to_datetime(medications['STOP'], format='%Y-%m-%d')

# get medication duration
medications['DURATION'] = (medications['STOP'] - medications['START']).dt.days
# medications = medications[medications['DURATION'] > 0]

In [None]:
# calculate medical duration per drugs
metformin = medications[medications['CODE'] == DrugDiabetes.METFORMIN]
#metformin.describe()

In [None]:
# calculate medical duration per drugs
sglt2 = medications[medications['CODE'] == DrugDiabetes.SGLT2]
sglt2.describe()

In [None]:
# calculate medical duration per drugs
glp1 = medications[medications['CODE'] == DrugDiabetes.GLP1]
glp1.describe()

In [None]:
# calculate medical duration per drugs
insulin = medications[medications['CODE'] == DrugDiabetes.INSULIN]
insulin.describe()

In [None]:
# There is no record of someone get more than one metformin

# fill medication stop with duration median (per medication)
met = medications[medications['CODE'] == DrugDiabetes.METFORMIN].reset_index(drop=True)
met['MEDNUM'] = met.groupby(by=['PATIENT']).cumcount() + 1
#met.set_index(['PATIENT', 'ENCOUNTER'])
met[met.MEDNUM > 1]

### 1. Medications

Transform medications from the same date into separated columns

- file : "diabetes/m.csv"
- fields : PATIENT, ENCOUNTER, MEDSTART, INSULIN, METFORMIN, GLP1, SGLT2, COMBINATION, MEDCLASS

Step by step :
- One hot encoding for medication
- Combine all medications on the same date
- If there is no stop date, then the treatment data on line [i] will continue to run on line [i + 1], but if there is a stop date, it will not carry over to the next medication
- So if there is a new drugs used in the next record, it will come as an intensification of the treatment

In [None]:
# load medications
medications = pd.read_csv('set_full/medications.csv')

# diabetes only
medications = medications[medications['REASONCODE'] == Disease.DIABETES].reset_index(drop=True)

# drop unused columns
medications = medications.drop(['DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'], axis=1)

# create multiple indexes
medications = medications.set_index(['PATIENT', 'START'])

# convert medications data to columns (one hot encoding)
medications = pd.get_dummies(data = medications, columns=['CODE'])

# sort based on date
medications.sort_index(inplace=True)

medications = medications.reset_index()

# Rename drugs columns
for d in DrugDiabetes:
    medications.rename(
        columns={            
            'CODE_' + str(d.value) : d.name
        }, inplace=True
    )

medications.head(5)

In [None]:
#medications = medications[medications['PATIENT'].isin(['018abf0e-6843-44c7-8003-5403631d8a53', '09ce20b5-1c49-4549-af2b-f2fc6399f6a6'])]

In [None]:
result = medications.groupby(by=['PATIENT','START','ENCOUNTER']).agg(['sum'])
result.columns = result.columns.droplevel(1)
result

In [None]:
medications['STOP'].fillna('-', inplace=True)

prev_patient = ''
grouped = medications.groupby(['PATIENT', 'START', 'ENCOUNTER'])

for index, group in grouped:
    patient = index[0]
    if patient != prev_patient:
        prev_patient = patient
        carry_insulin = False
        carry_metformin = False
        carry_glp1 = False
        carry_sglt2 = False

    temp = group.reset_index(drop=True)

    # cari carry sebelum aggregate
    for i in range(0, len(temp)):
        if (temp.loc[i]['INSULIN'] == 1):
            carry_insulin = (temp.loc[i]['STOP'] == '-')
        elif (temp.loc[i]['METFORMIN'] == 1):
            carry_metformin = (temp.loc[i]['STOP'] == '-')
        elif temp.loc[i]['GLP1'] == 1:
            carry_glp1 = (temp.loc[i]['STOP'] == '-')
        elif temp.loc[i]['SGLT2'] == 1:
            carry_sglt2 = (temp.loc[i]['STOP'] == '-')
    
    if carry_insulin:
        result.at[index, 'INSULIN'] = 1

    if carry_metformin:
        result.at[index, 'METFORMIN'] = 1

    if carry_glp1:
        result.at[index, 'GLP1'] = 1

    if carry_sglt2:
        result.at[index, 'SGLT2'] = 1

#result = result.reset_index()

In [None]:
# number of combination 
result['COMBINATION'] = result['INSULIN'] + result['METFORMIN'] + result['GLP1'] + result['SGLT2']

# medication class to categorize medications (what medications are used)
result['MEDCLASS'] = result['INSULIN'].astype('str') + result['METFORMIN'].astype('str') + result['GLP1'].astype('str') + result['SGLT2'].astype('str')

# medication number
result['MEDNUM'] = result.groupby(by=['PATIENT']).cumcount() + 1

In [None]:
# get previous medication
result['PREVMED'] = result.groupby(by=['PATIENT'])['MEDCLASS'].shift(1, fill_value='0000')

In [None]:
result = result.reset_index()

In [None]:
# rename columns
result = result.rename(
    columns={
        'START':'MEDSTART'
    }
)

In [None]:
result.to_csv('diabetes/m.csv', index=False)

### Add more data : first medication date, previous medication duration

In [None]:
data = pd.read_csv('diabetes/m.csv', converters={'MEDCLASS': lambda x: str(x), 'PREVMED': lambda x: str(x)})

data['PREVMEDSTART'] = data.groupby(by=['PATIENT'])['MEDSTART'].shift(1)

data['MEDSTART'] = pd.to_datetime(data['MEDSTART'], format='%Y-%m-%d')
data['PREVMEDSTART'] = pd.to_datetime(data['PREVMEDSTART'], format='%Y-%m-%d')

data['PREVMEDDUR'] = (data['MEDSTART'] - data['PREVMEDSTART']).dt.days
data['PREVMEDDUR'].fillna(0, inplace=True)

data.head()

data.to_csv('diabetes/m.csv', index=False)

### 2. Observations
- file : "diabetes/o.csv"
- fields : DATE, PATIENT, ENCOUNTER, HBA1C (%), BMI (kg/m2)

In [None]:
observations = pd.read_csv('set_full/observations.csv')
observations = observations[
    (observations['CODE'] == Observation.HBA1C.value) | (observations['CODE'] == Observation.BMI.value)]
observations.drop(['DESCRIPTION', 'UNITS'], axis=1, inplace=True, errors='ignore')

# minimum observation value for Diabetes is 0
observations['VALUE'].replace(observations[observations['VALUE'] < 0]['VALUE'].array, np.nan, inplace=True)
observations = observations[~observations['VALUE'].isna()].reset_index(drop=True)
observations
observations.to_csv('diabetes/o.csv', index=False)

In [None]:
observations.tail()

### 3. Patient - Medication

- file : "diabetes/p_m.csv"
- fields :
    - [RACE, ETHNICITY, GENDER, PATIENT, ENCOUNTER] 
    - [MEDSTART, INSULIN, METFORMIN, GLP1, SGLT2, DEAD, COMBINATION]
    - [AGEDEAD, AGEMEDICATION, AGEFIRSTMEDICATION, DAYSLIVEFIRSTMED]


In [None]:
# load data
patients = pd.read_csv('set_full/patients.csv')

medications = pd.read_csv('diabetes/m.csv', converters={'MEDCLASS': lambda x: str(x), 'PREVMED': lambda x: str(x)})

# MERGE : PATIENT + MEDICATION

# diabetes patients based on medications record
diabetes = medications
diabetes_patient_ids = diabetes['PATIENT'].unique()

# list of patients who have diabetes
diabetes_patients = patients[patients['ID'].isin(diabetes_patient_ids)]

# data of patient with diabetes medication
merged = diabetes_patients.merge(diabetes, left_on='ID', right_on='PATIENT', how='left')

# Convert data types
merged['BIRTHDATE'] = pd.to_datetime(merged['BIRTHDATE'], format='%d/%m/%y')
merged['DEATHDATE'] = pd.to_datetime(merged['DEATHDATE'], format='%d/%m/%y')
merged['MEDSTART'] = pd.to_datetime(merged['MEDSTART'], format='%Y-%m-%d')

# Only those who are still alive or death before 2015 
merged = merged[(merged['DEATHDATE'].isna()) | (merged['DEATHDATE'] < pd.to_datetime('2015-01-01'))]

# Fix 2 digits of year misinterpretation
merged.loc[merged['BIRTHDATE'] > merged['MEDSTART'], 'BIRTHDATE'] = merged['BIRTHDATE'] - pd.offsets.DateOffset(years=100)
merged.loc[merged['DEATHDATE'] > pd.to_datetime('2020-12-31'), 'DEATHDATE'] = merged['DEATHDATE'] - pd.offsets.DateOffset(years=100)

# Add additional information

# dead status
merged['DEAD'] =  np.where(merged['DEATHDATE'].isnull(), 0, 1)

# age when dead (accuracy up to month level)
merged['AGEDEAD'] = merged['DEATHDATE'].dt.year - merged['BIRTHDATE'].dt.year - (merged['DEATHDATE'].dt.month < merged['BIRTHDATE'].dt.month)

# when the medication happened
merged['AGEMEDICATION'] = (merged['MEDSTART']).dt.year - (merged['BIRTHDATE']).dt.year - (merged['MEDSTART'].dt.month < merged['BIRTHDATE'].dt.month)

# age first medication
group = merged.groupby(['PATIENT'])
group_age_medication = group['AGEMEDICATION'].min()
temp = pd.DataFrame({'PATIENTID': group_age_medication.keys(), 'AGEFIRSTMEDICATION':group_age_medication.values})
merged = merged.merge(temp, left_on='ID', right_on='PATIENTID', how='left')

# livespan after first medication
merged['DAYSLIVEFIRSTMED'] = (merged['DEATHDATE'] - merged['MEDSTART']).dt.days

# Drop unused columns
merged.drop(['ID', 'BIRTHDATE', 'DEATHDATE', 'PATIENTID'], axis=1, inplace=True, errors='ignore')

merged = merged.drop_duplicates()

# get the patient_medication table
merged = merged.sort_values(by=['PATIENT', 'ENCOUNTER', 'MEDSTART'])

# save
merged.to_csv('diabetes/p_m.csv', index=False)

In [None]:
merged

### 4. Medication - Observation

- file : "diabetes/m_o.csv"
- fields : PATIENT, ENCOUNTER, MEDSTART, INSULIN, METFORMIN, GLP1, SGLT2, COMBINATION, OBSDATE, HBA1C, BMI

Assumption : <= useless, because the observations are always in the same date with medications
- The medication given cannot only based on the observation at the same date.
- In this case, I make an assumption that the medication is given based on the last observation within max 1 month



In [None]:
# load medications

medications = pd.read_csv('diabetes/m.csv', converters={'MEDCLASS': lambda x: str(x)})

# load observations
patients = list(set(medications['PATIENT']))

observations = pd.read_csv('diabetes/o.csv')
observations = observations.rename(columns={'DATE':'OBSDATE', 'VALUE': 'OBSVALUE'})

# get obs that related to the medications
observations = observations[observations['PATIENT'].isin(patients)].reset_index(drop=True)

# HBA1C
hba1c = observations[observations['CODE'] == Observation.HBA1C.value]

# MERGE : MEDICATION + OBSERVATIONS (HBA1C)
merged = medications.merge(
    hba1c, 
    left_on=['PATIENT', 'ENCOUNTER', 'MEDSTART'],
    right_on=['PATIENT', 'ENCOUNTER', 'OBSDATE'],
    how='left')
merged = merged[~merged['OBSVALUE'].isna()].reset_index(drop=True)
merged = merged.drop('CODE', axis=1)
merged = merged.rename(columns={'OBSVALUE':'HBA1C'})

# BMI
bmi = observations[observations['CODE'] == Observation.BMI.value]
merged = merged.merge(
    bmi, 
    left_on=['PATIENT', 'ENCOUNTER', 'MEDSTART'],
    right_on=['PATIENT', 'ENCOUNTER', 'OBSDATE'],
    how='left')
merged = merged.drop(['CODE', 'OBSDATE_y'], axis=1)
merged = merged.rename(columns={'OBSVALUE':'BMI', 'OBSDATE_x':'OBSDATE'})

merged
merged.to_csv('diabetes/m_o.csv', index=False)

### 4. Patient - Medication - Observation

- file : "diabetes/p_m_o.csv"
- fields : 
    - [RACE, ETHNICITY, GENDER, PATIENT, ENCOUNTER] 
    - [MEDSTART, INSULIN, METFORMIN, GLP1, SGLT2]
    - [DEAD, AGEDEAD, AGEMEDICATION, AGEFIRSTMEDICATION, DAYSLIVEFIRSTMED, ISFIRST]
    - [OBSDATE, HBA1C, BMI]

In [None]:
pm = pd.read_csv('diabetes/p_m.csv', converters={'MEDCLASS': lambda x: str(x)})

# load observations
observations = pd.read_csv('diabetes/o.csv')
observations = observations.rename(columns={'DATE':'OBSDATE'})

# HBA1C
hba1c = observations[observations['CODE'] == Observation.HBA1C.value]
pmo = pm.merge(
    hba1c, 
    left_on=['PATIENT', 'ENCOUNTER', 'MEDSTART'],
    right_on=['PATIENT', 'ENCOUNTER', 'OBSDATE'],
    how='left')
pmo = pmo[~pmo['VALUE'].isna()].reset_index(drop=True)
pmo = pmo.drop('CODE', axis=1)
pmo = pmo.rename(columns={'VALUE':'HBA1C'})

# BMI
bmi = observations[observations['CODE'] == Observation.BMI.value]
pmo = pmo.merge(
    bmi, 
    left_on=['PATIENT', 'ENCOUNTER', 'MEDSTART'],
    right_on=['PATIENT', 'ENCOUNTER', 'OBSDATE'],
    how='left')
pmo = pmo.drop(['CODE', 'OBSDATE_y'], axis=1)
pmo = pmo.rename(columns={'VALUE':'BMI', 'OBSDATE_x':'OBSDATE'})

pmo.head(5)

pmo.to_csv('diabetes/p_m_o.csv', index=False)

## --------------------------------------------------------------------------------------------------------

## Hypertension

In [None]:
# load data
pm = pd.read_csv('diabetes/p_m.csv', converters={'MEDCLASS': lambda x: str(x)})
pm['C_HYPER'] = 0

hyper = pd.read_csv('set_full/conditions.csv')
hyper = con[(con['CODE'] == Disease.HYPERTENSION) & con['PATIENT'].isin(list(set(pm['PATIENT'])))]
hyper = hyper.drop(['STOP', 'ENCOUNTER', 'CODE', 'DESCRIPTION'], axis=1).reset_index(drop=True)
set(pm.ETHNICITY)

In [None]:
# set hypertension to p_m
#arr[i][0]:START arr[i][1]:PATIENT
arr = hyper.values
for i in range(0, len(arr)):
    pm.loc[(pm['PATIENT'] == arr[i][1]) & (pm['MEDSTART'] >= arr[i][0]), 'C_HYPER'] =  1
pm

In [None]:
pm.to_csv('diabetes_hypertension/p_m.csv', index=False)