In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# File paths
base_path = "/kaggle/input/mimic-iii-10k/MIMIC -III (10000 patients)"

# Load CSVs
admissions = pd.read_csv(f"{base_path}/ADMISSIONS/ADMISSIONS_sorted.csv")
patients = pd.read_csv(f"{base_path}/PATIENTS/PATIENTS_sorted.csv")
diagnoses = pd.read_csv(f"{base_path}/DIAGNOSES_ICD/DIAGNOSES_ICD_sorted.csv")

In [None]:
print("Admissions:")
display(admissions.head())

print("\n Admissions Info:\n")
display(admissions.info())

### Understanding the Admissions Table
* `SUBJECT_ID` : Unique patient identifier
* `HADM_ID` : Unique hospital admission ID
* `ADMITTIME`, `DISHTIME` : Used to calculate length of stay and time between visits
* `DEATHTIME` : May indicate patients who died during or shortly after admission
* `ADMISSION_TYPE` : E.g., EMERGENCY, NEWBORN
* `HOSPITAL_EXPIRE_FLAG` : 1 = patient died during hospital stay
* **Target Construction :** If a patient has another `ADMITTIME` within 30 days of `DISCHTIME`, that admission is flagged as **readmitted**

In [None]:
print("Patients:")
display(patients.head())

print("\n Patients Info:\n")
display(patients.info())

### Understanding the Patients Table
* `GENDER`, `DOB` : For calculating age at admission
* `EXPIRE_FLAG`, `DOD_HOSP`, `DOD` : Useful for removing deceased from modeling or for labeling outcomes

In [None]:
print("Diagnoses:")
display(diagnoses.head())

print("\n Diagnoses Info:\n")
display(diagnoses.info())

### Understanding the Diagnoses Table
* `ICD9_CODE` : Can be grouped to predict readmission risk

## Create Readmission Labels

In [None]:
from datetime import timedelta

# convert to datetime
admissions["ADMITTIME"] = pd.to_datetime(admissions["ADMITTIME"])
admissions["DISCHTIME"] = pd.to_datetime(admissions["DISCHTIME"])

In [None]:
# sort by patient and admission time
admissions = admissions.sort_values(by=["SUBJECT_ID", "ADMITTIME"]).reset_index(drop=True)

In [None]:
# creating an admission label - by default 0 : not readmitted
admissions["READMISSION_LABEL"] = 0

# loop through each patient
for i in range(len(admissions) - 1):
    current = admissions.iloc[i]
    next_row = admissions.iloc[i + 1]

    # check if same patient and next admission is within 30 days
    if current["SUBJECT_ID"] == next_row["SUBJECT_ID"]:
        days_between = (next_row["ADMITTIME"] - current["DISCHTIME"]).days

        if 0 < days_between <= 30:
            admissions.at[i, "READMISSION_LABEL"] = 1

In [None]:
admissions["READMISSION_LABEL"].value_counts()

### Using Class Weights during modeling
For this case (real-world healthcare prediction), class weights are more appropriate over SMOTE.

## Check for null values