In [1]:
import pandas as pd
import time
import pyprevent
import jupyter_black

jupyter_black.load(lab=True)

## Data Setup

In [2]:
test_patient = ("female", 40, 200, 50, 120, True, True, 25, 70, True, True)
test_list = [test_patient for _ in range(1_000_000)]
df = pd.DataFrame(
    test_list,
    columns=[
        "sex",
        "age",
        "total_cholesterol",
        "hdl_cholesterol",
        "systolic_bp",
        "has_diabetes",
        "current_smoker",
        "bmi",
        "egfr",
        "on_htn_meds",
        "on_cholesterol_meds",
    ],
)

In [3]:
df.iloc[0].to_dict()

{'sex': 'female',
 'age': 40,
 'total_cholesterol': 200,
 'hdl_cholesterol': 50,
 'systolic_bp': 120,
 'has_diabetes': True,
 'current_smoker': True,
 'bmi': 25,
 'egfr': 70,
 'on_htn_meds': True,
 'on_cholesterol_meds': True}

## Individual patient calculations

Taking the first row in the form of a dict, we have a test patient.

The keys of this dictionary are the input arguments for all of the functions.

Here, we can pass the dictionary as kwargs to return the risk. Of note this float represents the risk as a percentage.

In [4]:
pyprevent.calculate_10_yr_ascvd_risk(**df.iloc[0].to_dict())

4.723678963112583

(You can also pass in the arguments like a normal python function)

In [5]:
pyprevent.calculate_30_yr_ascvd_risk(
    sex="MALE",
    age=40,
    total_cholesterol=200,
    hdl_cholesterol=50,
    systolic_bp=120,
    has_diabetes=False,
    current_smoker=False,
    bmi=25,
    egfr=70,
    on_htn_meds=False,
    on_cholesterol_meds=False,
)

7.008061525002453

All of the inputs have constains, and will return an error if the values are outside of the acceptable range.

These ranges are set by the AHA PREVENT formulas to constain any extrapolation at extreme values.

In [6]:
pyprevent.calculate_30_yr_ascvd_risk(
    
    sex="unknown",

    
    age=40,
    total_cholesterol=200,
    hdl_cholesterol=50,
    systolic_bp=120,
    has_diabetes=False,
    current_smoker=False,
    bmi=25,
    egfr=70,
    on_htn_meds=False,
    on_cholesterol_meds=False,
)

ValueError: Sex must be either 'male' or 'female'.

In [7]:
pyprevent.calculate_30_yr_ascvd_risk(
    sex="male",
    
    age=10,
    
    total_cholesterol=200,
    hdl_cholesterol=50,
    systolic_bp=120,
    has_diabetes=False,
    current_smoker=False,
    bmi=25,
    egfr=70,
    on_htn_meds=False,
    on_cholesterol_meds=False,
)

ValueError: Age must be between 30 and 59

## Batch Calculations

There are also functions to batch apply this function.

In our test data set, we have a million patients.

In [8]:
print(len(df))

1000000


In [9]:
pyprevent.batch_calculate_10_yr_ascvd_risk(df)

[4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,
 4.723678963112583,


In [11]:
start = time.perf_counter()
pyprevent.batch_calculate_30_yr_ascvd_risk(df)
end = time.perf_counter()
duration = end - start
print(f"Time to run 1 million rows: {duration} seconds")

Time to run 1 million rows: 0.8204120000009425 seconds


## Data validation

Again, if a value is invalid, the batch methods will return a ValueError.

In [12]:
# Setting age to 100.0 near the end of the dataframe
df.loc[999_998, "age"] = 100.0
print(df.iloc[-2])
pyprevent.batch_calculate_30_yr_ascvd_risk(df)

sex                    female
age                       100
total_cholesterol         200
hdl_cholesterol            50
systolic_bp               120
has_diabetes             True
current_smoker           True
bmi                        25
egfr                       70
on_htn_meds              True
on_cholesterol_meds      True
Name: 999998, dtype: object


ValueError: Age must be between 30 and 59