In [18]:
import pandas as pd


=== Load OMOP CDM tables ===

In [19]:
concepts = pd.read_csv("ehrshot-omop/concept.csv", low_memory=False)
concept_relationship = pd.read_csv("ehrshot-omop/concept_relationship.csv")
condition = pd.read_csv("ehrshot-omop/condition_occurrence.csv", low_memory=False)
person = pd.read_csv("ehrshot-omop/person.csv")

=== Define CKD ICD-10 codes and mappings ===

In [None]:
ckd_icd10_codes = ["N18.1", "N18.2", "N18.3", "N18.4", "N18.5", "N18.6"]    #N18.9 is "Unspecified"
related_codes = ["N18.9", "N18.30", "N18.31", "N18.32", "Z99.2", "I12.0", "I13.11", "I13.2"]
ckd_icd10_codes.extend(related_codes)

stage_map = {
    "N18.1": "Stage 1", "N18.2": "Stage 2", "N18.3": "Stage 3",
    "N18.4": "Stage 4", "N18.5": "Stage 5", "N18.6": "ESRD",
}

=== Map ICD-10 codes to SNOMED concept_ids ===

In [None]:
ckd_icd10_concepts = concepts[      #find relevant ICD10 concepts 
    (concepts["vocabulary_id"] == "ICD10CM") &
    (concepts["concept_code"].isin(ckd_icd10_codes))
]

mapped = concept_relationship[      #map ICD10 to snomed
    (concept_relationship["relationship_id"] == "Maps to") &
    (concept_relationship["concept_id_1"].isin(ckd_icd10_concepts["concept_id"]))
]

ckd_snomed_concepts = concepts[concepts["concept_id"].isin(mapped["concept_id_2"])]

=== Identify CKD patients based on SNOMED concept_id ===

In [None]:
ckd_concept_ids = ckd_snomed_concepts["concept_id"].tolist()
ckd_conditions = condition[condition["condition_concept_id"].isin(ckd_concept_ids)].copy()
ckd_patient_ids = ckd_conditions["person_id"].unique()
print(f" Found {len(ckd_patient_ids)} unique CKD patients.")

=== Map SNOMED concept_id to CKD stage ===

In [87]:
concept_stage_map = {}
for _, row in mapped.iterrows():
    icd_code = ckd_icd10_concepts.loc[
        ckd_icd10_concepts["concept_id"] == row["concept_id_1"], "concept_code"
    ].values[0]
    stage = stage_map.get(icd_code)
    concept_stage_map[row["concept_id_2"]] = stage


#add ckd_stage column to ckd_conditions df
ckd_conditions["ckd_stage"] = ckd_conditions["condition_concept_id"].map(concept_stage_map)
ckd_conditions["condition_start_DATE"] = pd.to_datetime(ckd_conditions["condition_start_DATE"])

=== Earliest diagnosis date per patient and stage ===

In [None]:
# Identify the earliest diagnosis date for each CKD stage per patient.
stage_dates = (
    ckd_conditions
    .groupby(["person_id", "ckd_stage"])["condition_start_DATE"]
    .min()
    .reset_index()
)


stage_order = ["Stage 1", "Stage 2", "Stage 3", "Stage 4", "Stage 5", "ESRD"]
stage_dates["stage_order"] = stage_dates["ckd_stage"].apply(lambda x: stage_order.index(x))
stage_dates = stage_dates.sort_values(["person_id", "stage_order"])

stage_dates

=== Pivot to wide format: one row per patient ===

In [None]:
pivoted = stage_dates.pivot(index='person_id', columns='ckd_stage', values='condition_start_DATE')
pivoted = pivoted.reindex(columns=stage_order)
pivoted.reset_index()

#pivoted.to_csv("results/ckd_stage_diagnoses.csv")
pivoted

=== Determime inter-stage transition times for each patient ===

In [None]:
ckd_stage_diagnoses = pd.read_csv("results/ckd_stage_diagnoses.csv")

#create a frame to hold inter-stage transition times
stage_times = pd.DataFrame(columns=["person_id", "1 to 2", "2 to 3", "3 to 4", "4 to 5", "5 to End stage"])

#here, we iterate through the diagnosis dates to find the time each patient transitions between stages

for row in ckd_stage_diagnoses.itertuples(index=False):

    new_row = [row.person_id]

    for i in range(2, len(row)): #iterate through each column (stage) of each row (patient)
        next_stage = row[i]
        prev_stage = row[i-1]
        
        if isinstance(next_stage, str) and isinstance(prev_stage, str): #if both stages aren't tracked, there's no transition time
            new_row.append((pd.to_datetime(next_stage) - pd.to_datetime(prev_stage)).days)
        else:
            new_row.append(None)

    stage_times.loc[len(stage_times)] = new_row

#stage_times.to_csv("results/stage_times.csv", index=False)
stage_times

 === Finally, find the mean and median of our transition times ===

In [None]:
stage_times = pd.read_csv("results/stage_times.csv").drop(columns="person_id")

transition_columns = stage_times.columns.to_list()  #exclude patient id column

filtered_times = stage_times[transition_columns].where(stage_times[transition_columns] >= 0)

summary = pd.DataFrame({
    "Raw Mean": stage_times.mean(),
    "Raw Median": stage_times.median(),
    "Filtered Mean": filtered_times.mean(),
    "Filtered Median": filtered_times.median(),
}).T.round(2)

#summary.to_csv("results/summary.csv")
summary

In conclusion, we found 1290 unique CKD patients. Our mean and median stage transition times can be found in the summary csv.