# Reproduction of Tangri et al.'s Kidney Failure Risk Equation (KFRE)

## Import Libraries

In [1]:
########################### Import Requisite Libraries #########################
import pandas as pd
import numpy as np
import os
import sys
import pprint

################################################################################
# Add the parent directory to sys.path to access 'functions.py'
sys.path.append(os.path.join(os.pardir))
from python_scripts.functions import *
from python_scripts.kfre import *

## Read in the Data

In [2]:
base_path = os.path.join(os.pardir)

# Go up one level from 'notebooks' to the parent directory, then into the 'data' folder
data_path = os.path.join(os.pardir, "data")

image_path_png = os.path.join(base_path, "images", "png_images")
image_path_svg = os.path.join(base_path, "images", "svg_images")

# Use the function to ensure the 'data' directory exists
ensure_directory(data_path)
ensure_directory(image_path_png)
ensure_directory(image_path_svg)

# Read the CSV file into a DataFrame
df = pd.read_parquet(os.path.join(data_path, "df.parquet"))
df_eda = pd.read_parquet(os.path.join(data_path, "df_eda.parquet"))

Directory exists: ..\data
Directory exists: ..\images\png_images
Directory exists: ..\images\svg_images


In [3]:
# join back sex to original dataframe for eda dataframe so it can be used
df = df.join(df_eda["SEX"], on="Patient_ID", how="inner")

In [4]:
df_kfre = df.copy(deep=True)

In [5]:
df_kfre.columns

Index(['Age', 'Diabetes (1=yes; 0=no)', 'Hypertension (1=yes; 0=no)',
       'Renal disease (DM=1, HTN=2, GN=3, ADPKD=4, Other=5)', 'eGFR-EPI',
       'uPCR', 'Calcium (mmol/L)', 'Phosphate (mmol/L)',
       'Bicarbonate (mmol/L)', 'Albumin (g/l)', 'Follow-up YEARS', 'RIP',
       'ESRD', 'SEX_Female', 'SEX_Male',
       'ETHNICITY_AFRICAN (BLACK OR BLACK BRITISH)',
       'ETHNICITY_ANY OTHER ASIAN BACKGROUND',
       'ETHNICITY_ANY OTHER BLACK BACKGROUND',
       'ETHNICITY_ANY OTHER ETHNIC GROUP',
       'ETHNICITY_ANY OTHER MIXED BACKGROUND',
       'ETHNICITY_ANY OTHER WHITE BACKGROUND',
       'ETHNICITY_BANGLADESHI (ASIAN OR ASIAN BRITISH)',
       'ETHNICITY_CARIBBEAN (BLACK OR BLACK BRITISH)',
       'ETHNICITY_CHINESE (OTHER ETHNIC GROUPS)',
       'ETHNICITY_INDIAN (ASIAN OR ASIAN BRITISH)', 'ETHNICITY_NOT STATED',
       'ETHNICITY_PAKISTANI (ASIAN OR ASIAN BRITISH)',
       'ETHNICITY_WHITE AND BLACK AFRICAN (MIXED)', 'ETHNICITY_WHITE BRITISH',
       'ETHNICITY_WHITE IRIS

In [6]:
df

Unnamed: 0_level_0,Age,Diabetes (1=yes; 0=no),Hypertension (1=yes; 0=no),"Renal disease (DM=1, HTN=2, GN=3, ADPKD=4, Other=5)",eGFR-EPI,uPCR,Calcium (mmol/L),Phosphate (mmol/L),Bicarbonate (mmol/L),Albumin (g/l),...,Renal_Disease_APKD,Renal_Disease_DM,Renal_Disease_GN,Renal_Disease_HTN,Renal_Disease_Other,Calcium (mg/g),Phosphate (mg/g),Albumin (g/dL),uACR,SEX
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
568268916,87.24,1.0,1.0,1.0,19.0,291.72,2.78,0.88,27.2,37.0,...,0,1,0,0,0,11.12,2.728,3.7,102.435547,Male
659549633,56.88,0.0,1.0,5.0,15.0,3491.80,2.43,1.02,21.3,30.0,...,0,0,0,0,1,9.72,3.162,3.0,1762.001840,Female
406748956,66.53,0.0,1.0,3.0,17.0,1440.92,2.33,1.24,27.8,36.0,...,0,0,1,0,0,9.32,3.844,3.6,659.122070,Female
109228713,69.92,0.0,1.0,3.0,12.0,2210.00,2.29,1.80,20.7,39.0,...,0,0,1,0,0,9.16,5.580,3.9,1145.220631,Male
220533110,81.14,1.0,1.0,1.0,15.0,1918.28,2.45,1.39,26.2,43.0,...,0,1,0,0,0,9.80,4.309,4.3,980.918742,Female
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
708268897,68.45,0.0,1.0,3.0,23.0,6462.04,2.38,0.53,27.8,24.0,...,0,0,1,0,0,9.52,1.643,2.4,3491.181746,Female
019128787,32.26,0.0,1.0,5.0,16.0,5975.84,2.46,1.51,21.3,41.0,...,0,0,0,0,1,9.84,4.681,4.1,3200.622314,Female
371958631,72.34,1.0,1.0,1.0,20.0,5569.20,2.51,0.96,22.9,40.0,...,0,1,0,0,0,10.04,2.976,4.0,3462.727326,Male
271875856,73.10,1.0,1.0,1.0,13.0,9759.36,2.24,1.37,29.1,31.0,...,0,1,0,0,0,8.96,4.247,3.1,5977.151420,Female


In [7]:
# subset the dataframe to include only relevant variables for the KFRE equation
# Age, eGFR, log-normalized uACR, Diabetes, Hypertension
df_kfre = df_kfre[
    [
        "Age",
        "SEX",
        "eGFR-EPI",
        "uACR",
        "Diabetes (1=yes; 0=no)",
        "Hypertension (1=yes; 0=no)",
        "Calcium (mmol/L)",
        "Phosphate (mmol/L)",
        "Bicarbonate (mmol/L)",
        "ESRD",
    ]
]

In [8]:
df_kfre["Region"] = "Non North American"

In [9]:
columns = {
    "age": "Age",
    "sex": "SEX",
    "eGFR": "eGFR-EPI",
    "uACR": "uACR",
    "region": "Region",
    "dm": "Diabetes (1=yes; 0=no)",
    "htn": "Hypertension (1=yes; 0=no)",
}

# Assuming `df` is your DataFrame loaded with the dataset
predictor = RiskPredictor(data=df_kfre, columns=columns)
risk = predictor.predict(2, use_extra_vars=True)


# Predict risk for different combinations of years and variables
pred_4var_2year = predictor.predict(years=2)
pred_4var_5year = predictor.predict(years=5)
pred_6var_2year = predictor.predict(years=2, use_extra_vars=True)
pred_6var_5year = predictor.predict(years=5, use_extra_vars=True)

# inner join the different combinations of years and variables to
# kfre_inclusion dataframe
df_kfre = df_kfre.assign(
    pred_4var_2year=pred_4var_2year,
    pred_4var_5year=pred_4var_5year,
    pred_6var_2year=pred_6var_2year,
    pred_6var_5year=pred_6var_5year,
)

In [14]:
df_kfre

Unnamed: 0_level_0,Age,SEX,eGFR-EPI,uACR,Diabetes (1=yes; 0=no),Hypertension (1=yes; 0=no),Calcium (mmol/L),Phosphate (mmol/L),Bicarbonate (mmol/L),ESRD,Region,pred_4var_2year,pred_4var_5year,pred_6var_2year,pred_6var_5year
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
568268916,87.24,Male,19.0,102.435547,1.0,1.0,2.78,0.88,27.2,0,Non North American,0.067174,0.236054,0.063954,0.225795
659549633,56.88,Female,15.0,1762.001840,0.0,1.0,2.43,1.02,21.3,1,Non North American,0.449505,0.900884,0.481874,0.921615
406748956,66.53,Female,17.0,659.122070,0.0,1.0,2.33,1.24,27.8,0,Non North American,0.219609,0.617166,0.239008,0.652721
109228713,69.92,Male,12.0,1145.220631,0.0,1.0,2.29,1.80,20.7,1,Non North American,0.482796,0.922154,0.516281,0.939927
220533110,81.14,Female,15.0,980.918742,1.0,1.0,2.45,1.39,26.2,0,Non North American,0.235647,0.646744,0.225403,0.628056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
708268897,68.45,Female,23.0,3491.181746,0.0,1.0,2.38,0.53,27.8,0,Non North American,0.227777,0.632450,0.247777,0.667962
019128787,32.26,Female,16.0,3200.622314,0.0,1.0,2.46,1.51,21.3,1,Non North American,0.699341,0.990472,0.733873,0.994059
371958631,72.34,Male,20.0,3462.727326,1.0,1.0,2.51,0.96,22.9,0,Non North American,0.344607,0.805259,0.330743,0.788817
271875856,73.10,Female,13.0,5977.151420,1.0,1.0,2.24,1.37,29.1,0,Non North American,0.595622,0.969981,0.577071,0.964287


In [13]:
df_kfre["pred_4var_2year"].median()

0.23065730964986586