# Reproduction of Tangri et al.'s Kidney Failure Risk Equation (KFRE)

## Import Libraries

In [1]:
########################### Import Requisite Libraries #########################
import pandas as pd
import numpy as np
import os
import sys
import pprint

################################################################################
# Add the parent directory to sys.path to access 'functions.py'
sys.path.append(os.path.join(os.pardir))
from python_scripts.functions import *

## Read in the Data

In [2]:
base_path = os.path.join(os.pardir)

# Go up one level from 'notebooks' to the parent directory, then into the 'data' folder
data_path = os.path.join(os.pardir, "data")

image_path_png = os.path.join(base_path, "images", "png_images")
image_path_svg = os.path.join(base_path, "images", "svg_images")

# Use the function to ensure the 'data' directory exists
ensure_directory(data_path)
ensure_directory(image_path_png)
ensure_directory(image_path_svg)

# Read the CSV file into a DataFrame
df = pd.read_parquet(os.path.join(data_path, "df.parquet"))

Directory exists: ..\data
Directory exists: ..\images\png_images
Directory exists: ..\images\svg_images


In [3]:
df_kfre = df.copy(deep=True)

In [4]:
# subset the dataframe to include only relevant variables for the KFRE equation
# Age, eGFR, log-normalized uACR, Diabetes, Hypertension
df_kfre = df_kfre[
    [
        "Age",
        "eGFR-EPI",
        "log_uACR",
        "Diabetes (1=yes; 0=no)",
        "Hypertension (1=yes; 0=no)",
        "Calcium (mmol/L)",
        "Phosphate (mmol/L)",
        "Bicarbonate (mmol/L)",
    ]
]

In [5]:
df_kfre.columns.to_list()  # confirm that the subsetted columns are correct

['Age',
 'eGFR-EPI',
 'log_uACR',
 'Diabetes (1=yes; 0=no)',
 'Hypertension (1=yes; 0=no)',
 'Calcium (mmol/L)',
 'Phosphate (mmol/L)',
 'Bicarbonate (mmol/L)']