In [1]:
from medmodels import MedRecord

## MedRecord Overviews

In [2]:
medrecord = MedRecord.from_advanced_example_dataset()

medrecord.overview_nodes()

-------------------------------------------------------------
Nodes Group Count Attribute   Type         Data              
-------------------------------------------------------------
diagnosis   206   description Unstructured 206 unique values 
drug        185   description Unstructured 184 unique values 
event       1     -           -            -                 
patient     600   age         Continuous   min: 0            
                                           max: 102          
                                           mean: 40.98       
                  gender      Categorical  Categories: F, M  
procedure   96    description Unstructured 96 unique values  
-------------------------------------------------------------

In [3]:
medrecord.overview_edges()

------------------------------------------------------------------------------
Edges Group       Count Attribute        Type       Data                      
------------------------------------------------------------------------------
patient_diagnosis 5741  duration_days    Continuous min: 0.00                 
                                                    max: 16926.00             
                                                    mean: 223.92              
                        time             Temporal   min: 1921-02-02 00:00:00  
                                                    max: 2025-01-14 00:00:00  
                                                    mean: 2012-01-21 10:32:41 
patient_drug      10373 cost             Continuous min: 0.01                 
                                                    max: 172813.04            
                                                    mean: 719.73              
                        quantity         Continuous 

## MedRecord Querying

In [4]:
from medmodels.medrecord.querying import NodeOperand

def query_young_females_with_insulin(node: NodeOperand) -> None:
    """Query that follows the following criteria:
    
    - The node must be a patient
    - The patient must be female
    - The patient must be 50 years old or younger
    - The patient must have a neighbor with the description containing the word "insulin"
    """
    node.in_group("patients")

    gender = node.attribute("gender")
    gender.equal_to("F")

    node.attribute("age").less_than_or_equal_to(50)

    description_neighbors = node.neighbors().attribute("description")
    description_neighbors.contains("insulin")


medrecord.node[query_young_females_with_insulin]

{}

## MedRecord Comparison

In [5]:
from medmodels.statistic_evaluations.evaluate_compare.evaluate import CohortEvaluator

def query_insulin_treated(node: NodeOperand) -> None:
    description_neighbors = node.neighbors().attribute("description")
    description_neighbors.contains("insulin")

def query_had_caries(node: NodeOperand) -> None:
    description_neighbors = node.neighbors().attribute("description")
    description_neighbors.contains("caries")


insulin = CohortEvaluator(
   medrecord, name="Insulin", patient_group="patient", cohort_query=query_insulin_treated
)
caries = CohortEvaluator(
    medrecord, name="Caries", patient_group="patient", cohort_query=query_had_caries
)

In [28]:
import pandas as pd
pd.options.display.precision = 2

In [29]:
from medmodels.statistic_evaluations.evaluate_compare.compare import CohortComparer

age_comparison = CohortComparer.compare_cohort_attribute(
    cohorts=[insulin, caries], attribute="age"
)
pd.DataFrame(age_comparison)

Unnamed: 0,Insulin,Caries
type,Continuous,Continuous
min,44,8
max,98,98
mean,67.81,47.24
median,65.5,44.0
Q1,58.75,26.0
Q3,74.25,66.0


In [32]:
gender_comparison = CohortComparer.compare_cohort_attribute(
    cohorts=[insulin, caries], attribute="gender"
)
pd.DataFrame(gender_comparison)

Unnamed: 0,Insulin,Caries
type,Categorical,Categorical
count,2,2
top,M,M
freq,65.62 %,51.81 %


In [34]:
pd.options.display.precision = 8

In [36]:
age_comparison = {
    "Age Comparison": CohortComparer.test_difference_attribute(
        cohorts=[insulin, caries], attribute="age"
    )
}
pd.DataFrame(age_comparison)

Unnamed: 0,Age Comparison
test,Mann-Whitney U Test
Hypothesis,The distributions of both populations are equal.
p_value,0.00000012
not_reject,False


## Calculate TreatmentEffect

In [9]:
from medmodels.treatment_effect import TreatmentEffect

treatment = "insulin"
outcome = "diabetes"

def query_insulin_nodes(node: NodeOperand) -> None:
    description_nodes = node.attribute("description")
    description_nodes.contains(treatment)

def query_diabetes_nodes(node: NodeOperand) -> None:
    description_nodes = node.attribute("description")
    description_nodes.contains(outcome)

medrecord.add_group(group=treatment, nodes=query_insulin_nodes)
medrecord.add_group(group=outcome, nodes=query_diabetes_nodes)

In [10]:
treatment_effect = (
    TreatmentEffect.builder()
    .with_treatment(treatment)
    .with_outcome(outcome)
    .with_patients_group("patient")
    .with_time_attribute("time")
    .with_grace_period(days=30)
    .with_follow_up_period(days=365)
    .with_grace_period(days=30)
    .with_nearest_neighbors_matching(number_of_neighbors=2)
    .build()
)

In [11]:
contingency_table = treatment_effect.estimate.subject_counts(medrecord)
print(contingency_table)

-----------------------------------
                   Outcome   
Group           True     False   
-----------------------------------
Treated         0        64      
Control         23       105     
-----------------------------------


In [38]:
pd.options.display.precision = 2

In [39]:
report = {"Insulin Treatment Effect": treatment_effect.report.full_report(medrecord)}
pd.DataFrame(report)

Unnamed: 0,Insulin Treatment Effect
relative_risk,0.0
odds_ratio,0.0
confounding_bias,1.27
absolute_risk_reduction,0.21
number_needed_to_treat,4.27
hazard_ratio,0.0


### Using Propensity Matching

In [13]:
treatment_effect = (
    TreatmentEffect.builder()
    .with_treatment(treatment)
    .with_outcome(outcome)
    .with_patients_group("patient")
    .with_time_attribute("time")
    .with_grace_period(days=30)
    .with_follow_up_period(days=365)
    .with_grace_period(days=30)
    .with_propensity_matching(essential_covariates=["age", "gender"], one_hot_covariates=["gender"], number_of_neighbors=2)
    .build()
)

contingency_table = treatment_effect.estimate.subject_counts(medrecord)
print(contingency_table)

-----------------------------------
                   Outcome   
Group           True     False   
-----------------------------------
Treated         0        64      
Control         26       102     
-----------------------------------


In [40]:
report = {"Insulin Treatment Effect": treatment_effect.report.full_report(medrecord)}
pd.DataFrame(report)

Unnamed: 0,Insulin Treatment Effect
relative_risk,0.0
odds_ratio,0.0
confounding_bias,1.28
absolute_risk_reduction,0.2
number_needed_to_treat,5.12
hazard_ratio,0.0


## Synthesis with HALO

In [None]:
from medmodels.data_synthesis.halo import HALO_Model

hyperparameters = HALO_Hyperparameters(
    minimum_occurrences_concept=20,
    time_interval_days=7,
    number_steps_discretization=10,
)

model = (
    HALO_Model.builder()
    .with_medrecord(medrecord)
    .with_hyperparameters(hyperparameters)
    .train()
)

synthetic_medrecord = model.generate_synthetic_medrecord(number_of_patients=1000)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1339273385.py, line 18)