# core

> Contains functions to read and parse information from the Chia dataset

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#| export
import pandas as pd
import numpy as np

from pathlib import Path
from typing import List

In [None]:
#| hide
!cat data/annotation.conf

[entities]
!CONCEPTS
	Scope
	Person
	Condition
	Drug
	Observation
	Measurement
	Procedure
	Device
	Visit
!ANNOTATION
	Negation
	Qualifier
	Temporal
	Value
	Multiplier
	Reference_point
	Line
	Mood
!ERROR
	Non-query-able
	Post-eligibility
	Pregnancy_considerations
	Competing_trial
	Informed_consent
	Intoxication_considerations
	Non-representable

[events]

[relations]
h-OR   Arg1:<ENTITY>, Arg2:<ENTITY>, <REL-TYPE>:symmetric-transitive
v-AND Arg1:<ANY>, Arg2:<ANY>
v-OR Arg1:<ANY>, Arg2:<ANY>
multi Arg1:<ANY>, Arg2:<ANY>
<OVERLAP>	Arg1:<ANY>, Arg2:<ANY>, <OVL-TYPE>:<ANY>

[attributes]
Optional   Arg:<ANY>
 

In [None]:
#| export
def load_eligibility_criteria() -> pd.DataFrame:
    
    _lst = []
    
    ent_map = {
        "drugs": "Drug", 
        "persons": "Person", 
        "procedures": "Proceure", 
        "conditions": "Condition",
        "devices": "Device",
        "visits": "Visit",
        "scopes": "Scope",
        "observations": "Observation",
        "measurements": "Measurement",
    }
    
    for mode in ["_inc", "_exc"]:
        
        criteria_files = Path("data").glob(f"*{mode}.txt")

        for f in criteria_files:
            clinical_trial_no = str(f).lstrip("data/").rstrip(f"{mode}.txt")

            with open(f, "rt") as f:
                criteria = " ".join(f.read().splitlines())
                
            _rec = {}

            _rec["ct_no"] = clinical_trial_no
            _rec["criteria"] = criteria
            _rec["mode"] = "inclusion" if mode == "_inc" else "exclusion"

            for e in ent_map:
                ents = extract_entities(clinical_trial_no, mode, ent_map[e])
                _rec[e] = ents if ents else None

            _lst.append(_rec)
        
    return pd.DataFrame(_lst)

In [None]:
#| export
def extract_entities(ct: str, mode: str, e: str) -> List:
    entities = []
    
    with open(f"data/{ct}{mode}.ann", "rt") as f:
        data = f.read().splitlines()
        
    for row in data:
        if e in row:
            entities.append(" ".join(row.split()[4:]))

    return entities

In [None]:
#| hide
df = load_eligibility_criteria()
nrow, ncol = df.shape

test_eq(nrow, 2000)
test_eq(ncol, 12)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()