In [1]:
# prompt: load my google drive
import pandas as pd
import numpy as np
from datetime import datetime
pd.options.mode.copy_on_write = True

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# prompt: read the csv file located at MyDrive/Data/Output Data/inpatient_encounters.csv
inpatient_encounters = pd.read_csv('/content/drive/MyDrive/Data/Output Data/inpatient_encounters.csv')


In [3]:
# prompt: filter rows where YR is greater than 2019
inpatient_encounters = inpatient_encounters[inpatient_encounters['YR'] > 2019]
len(inpatient_encounters)

9906

In [4]:
inpatient_encounters = inpatient_encounters[inpatient_encounters['ER_flag'] == 0]
inpatient_encounters['ADMIT_DT'] = pd.to_datetime(inpatient_encounters['CLM_FROM_DT'])
inpatient_encounters['DISCHARGE_DT'] = pd.to_datetime(inpatient_encounters['CLM_THRU_DT'])
inpatient_encounters = inpatient_encounters.drop(columns=['PTNT_DSCHRG_STUS_CD', 'CLM_IP_ADMSN_TYPE_CD', 'ER_flag', 'NUM_DIAG','CLM_FROM_DT','CLM_THRU_DT'])



In [5]:
len(inpatient_encounters)

2797

In [6]:
# prompt: create a datasets num_admits with the columns BENE_ID and num_admits, which is the number of records a BENE_ID has in inpatient_encounters

num_admits = inpatient_encounters.groupby('BENE_ID').size().reset_index(name='num_admits')
#num_admits['num_admits'].value_counts()

In [7]:
# filter inpatient_encounters for BENE_ID that are in num_admits where num_admits is equal to 1
num_admits_1 = num_admits[num_admits['num_admits'] == 1]
bene_ids_1 = num_admits_1['BENE_ID'].tolist()

readmits0 = inpatient_encounters[inpatient_encounters['BENE_ID'].isin(bene_ids_1)].copy()
readmits0['READMIT_FLAG'] = 0



In [8]:
# filter inpatient_encounters for BENE_ID that are in num_admits where num_admits is greater than 1
num_admits_2 = num_admits[num_admits['num_admits'] > 1]
bene_ids_2 = num_admits_2['BENE_ID'].tolist()

readmits = inpatient_encounters[inpatient_encounters['BENE_ID'].isin(bene_ids_2)].copy()
readmits = readmits.sort_values(by=['BENE_ID', 'ADMIT_DT', 'DISCHARGE_DT']).reset_index(drop=True)


Adding appropriate days to ADMIT_DT and DISCHARGE_DT to an admission record so that the ADMIT_DT of the current admission occurs at least 1 day after the DISCHARGE_DT of the preceding admission record for the same BENE_ID.  



In [9]:
# Adjust ADMIT_DT and DISCHARGE_DT where needed
for i in range(1, len(readmits)):
    # Check if current ADMIT_DT is less than or equal to previous DISCHARGE_DT for the same BENE_ID
    if readmits.loc[i, 'BENE_ID'] == readmits.loc[i - 1, 'BENE_ID'] and readmits.loc[i, 'ADMIT_DT'] <= readmits.loc[i - 1, 'DISCHARGE_DT']:
        # Calculate the new ADMIT_DT (at least 1 day after previous DISCHARGE_DT)
        new_admit_dt = readmits.loc[i - 1, 'DISCHARGE_DT'] + pd.Timedelta(days=1)
        # Calculate the difference between new ADMIT_DT and old ADMIT_DT
        delta = new_admit_dt - readmits.loc[i, 'ADMIT_DT']
        # Update ADMIT_DT and DISCHARGE_DT by adding the delta
        readmits.loc[i, 'ADMIT_DT'] += delta
        readmits.loc[i, 'DISCHARGE_DT'] += delta

Using assert to test code and catch errors.  If there are violations of the assert statement, the program halts with an error.  
Two conditions are tested.  Either BENE_ID of two consecutive rows are different or the admit_dt of current record is greater than discharge date of previous record.  

In [10]:
# Verify results: Ensure no overlaps remain
for i in range(1, len(readmits)):
    assert readmits.loc[i, 'BENE_ID'] != readmits.loc[i - 1, 'BENE_ID'] or readmits.loc[i, 'ADMIT_DT'] > readmits.loc[i - 1, 'DISCHARGE_DT'], \
        f"Overlap detected between rows {i - 1} and {i}"

Creating readmit_flag for each group of rows belonging to one BENE_ID.  
The first "for" statement is evaluating a group of rows belonging to a single BENE_ID.  The second "for" is evaluting each row in the group.  The "if" statement checks for the following condition: is the the admit date of the current admission less than 90 days after the discharge date of the previous admission?  If yes, then readmit_flag is converted to 1.  

In [11]:
# Sort the DataFrame by BENE_ID and ADMIT_DT
readmits = readmits.sort_values(['BENE_ID', 'ADMIT_DT']).reset_index(drop=True)

# Initialize READMIT_FLAG column with 0
readmits['READMIT_FLAG'] = 0

for bene_id, group in readmits.groupby('BENE_ID'):
    for i in range(len(group) - 1):
        if (group.iloc[i + 1]['ADMIT_DT'] - group.iloc[i]['DISCHARGE_DT']).days <= 90:
            readmits.loc[group.index[i], 'READMIT_FLAG'] = 1
readmits


Unnamed: 0,BENE_ID,CLM_ID,YR,LOS,DRG,PRNCPAL_DGNS_CD,CLM_TOT_CHRG_AMT,ICD_Description,DRG_Description,ADMIT_DT,DISCHARGE_DT,READMIT_FLAG
0,-10000010288008,-10000931485965,2020,7,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2020-01-08,2020-01-14,0
1,-10000010288008,-10000931485969,2021,2,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2021-08-22,2021-08-23,1
2,-10000010288008,-10000931485971,2021,4,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2021-09-13,2021-09-16,0
3,-10000010287992,-10000931483882,2020,17,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2020-09-15,2020-10-01,0
4,-10000010287992,-10000931483887,2022,2,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2022-01-20,2022-01-21,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1745,-10000010254740,-10000930040596,2022,2,951,Z951,13991.04,Presence of aortocoronary bypass graft,Other Factors Influencing Health Status,2022-04-26,2022-04-27,1
1746,-10000010254740,-10000930040597,2022,2,951,Z951,1933.05,Presence of aortocoronary bypass graft,Other Factors Influencing Health Status,2022-04-28,2022-04-29,0
1747,-10000010254718,-10000930039730,2021,12,203,J208,28382.80,Acute bronchitis due to other specified organisms,Bronchitis and Asthma without CC/MCC,2021-01-30,2021-02-10,1
1748,-10000010254718,-10000930039731,2021,3,864,R5081,3399.77,Fever presenting with conditions classified el...,Fever and Inflammatory Conditions,2021-02-11,2021-02-13,1


In [12]:
readmits['READMIT_FLAG'].sum()

833

In [15]:
inpatient_readmits = pd.concat([readmits, readmits0], ignore_index=True)
inpatient_readmits


Unnamed: 0,BENE_ID,CLM_ID,YR,LOS,DRG,PRNCPAL_DGNS_CD,CLM_TOT_CHRG_AMT,ICD_Description,DRG_Description,ADMIT_DT,DISCHARGE_DT,READMIT_FLAG
0,-10000010288008,-10000931485965,2020,7,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2020-01-08,2020-01-14,0
1,-10000010288008,-10000931485969,2021,2,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2021-08-22,2021-08-23,1
2,-10000010288008,-10000931485971,2021,4,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2021-09-13,2021-09-16,0
3,-10000010287992,-10000931483882,2020,17,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2020-09-15,2020-10-01,0
4,-10000010287992,-10000931483887,2022,2,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2022-01-20,2022-01-21,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2792,-10000010287875,-10000931478364,2021,8,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2021-12-10,2021-12-17,0
2793,-10000010287882,-10000931478448,2020,13,975,J1282,31774.98,Pneumonia due to coronavirus disease 2019,HIV with Major Related Condition with CC,2020-12-01,2020-12-13,0
2794,-10000010287939,-10000931480929,2022,9,0,T50901A,166.97,"Poisoning by unspecified drugs, medicaments an...",,2022-11-02,2022-11-10,0
2795,-10000010287949,-10000931481450,2021,2,282,I214,1968.47,Non-ST elevation (NSTEMI) myocardial infarction,"Acute Myocardial Infarction, Discharged Alive ...",2021-02-11,2021-02-12,0


In [16]:
# Write inpatient_readmits to a CSV file in Google Drive
inpatient_readmits.to_csv('/content/drive/MyDrive/Data/Output Data/inpatient_readmits.csv', index=False)
