In [1]:
from database import Database
import re

In [2]:
credentials = {
    "host": "",
    "database": "aact",
    "user": "xc383@drexel.edu",
    "password": ""
}

In [3]:
db = Database(**credentials)
db.connect()

print("Querying database")
df = db.execute(sql="select current_timestamp", expecting_return=True)
print(df)

print("Disconnecting from database")
db.disconnect()

Querying database
                 current_timestamp
0 2023-11-06 16:23:43.322374+00:00
Disconnecting from database


In [4]:
x = db.execute_yield("select nct_id, criteria from eligibilities;")

In [10]:
next(x)

{'nct_id': 'NCT01301781',
 'criteria': 'Inclusion Criteria:\n\nConstipated, defined by ROME III definition\nSubject has < 3 satisfactory BMs during the run-in period\n\nExclusion Criteria:\n\nSubjects with known or suspected ileus, gastrointestinal obstruction, gastric retention, bowel perforation, toxic colitis, toxic megacolon\nSubjects taking laxatives or prokinetic agents that refuse to discontinue these treatments.\nSubjects who are allergic to any BLI801 component\nSubjects taking narcotic analgesics or other medications known to cause constipation.\nSubjects who, in the opinion of the Investigator, should not be included in the study for any reason, including inability to follow study procedures\nSubjects who have participated in an investigational clinical, surgical, drug, or device study within the past 30 days\nSubjects with an active history of drug or alcohol abuse'}

In [5]:
df = db.execute("select nct_id, criteria from eligibilities;", expecting_return=True)

In [6]:
text = df.iloc[5, 1]
print(text)

Inclusion Criteria:

Constipated, defined by ROME III definition
Subject has < 3 satisfactory BMs during the run-in period

Exclusion Criteria:

Subjects with known or suspected ileus, gastrointestinal obstruction, gastric retention, bowel perforation, toxic colitis, toxic megacolon
Subjects taking laxatives or prokinetic agents that refuse to discontinue these treatments.
Subjects who are allergic to any BLI801 component
Subjects taking narcotic analgesics or other medications known to cause constipation.
Subjects who, in the opinion of the Investigator, should not be included in the study for any reason, including inability to follow study procedures
Subjects who have participated in an investigational clinical, surgical, drug, or device study within the past 30 days
Subjects with an active history of drug or alcohol abuse


In [6]:
base_pattern = r"clusion\s+[Cc]riteria\S?\s+([\S\s]+)"
inclusion_pattern = "[Ii]n" + base_pattern + "[Ee]xclusion"
exclusion_pattern = "[Ee]x" + base_pattern + "[Ii]nclusion"

In [14]:
def breakdown_criteria_text(text: str) -> dict[str, str]:

    criteria_split_pattern = r"(\w+)\s+[Cc]riteria\W?"
    split_pattern = re.compile(criteria_split_pattern)

    doc = {
        "inclusion": [],
        "exclusion": []
    }

    criteria_parts = re.split(split_pattern, text)
    i = 0

    while i < len(criteria_parts):
        lower_case_part = criteria_parts[i].lower()
        if lower_case_part == "inclusion":
            i += 1
            doc[lower_case_part].append(criteria_parts[i])
        elif lower_case_part == "exclusion":
            i += 1
            doc[lower_case_part].append(criteria_parts[i])
        i += 1

    return doc

In [8]:
def tolines(text: str) -> list[str]:
    newlines_break = re.compile(r"\n+")
    return re.split(newlines_break, text)

In [34]:
def check_valid(section):
    return True

In [30]:
def filter_criterea(criteria):
    lines = criteria
    i = 0
    while i < len(lines):
        if len(lines[i]) < 10:
            lines.pop(i)
        else:
            i += 1
    return criteria

In [37]:
def process_criteria(idx, text):
    criteria = []
    sections = breakdown_criteria_text(text)

    inclusion = "\n".join(sections["inclusion"])
    exclusion = "\n".join(sections["exclusion"])

    if check_valid(inclusion):
        cnt = 0
        for item in filter_criterea(tolines(inclusion)):
            criteria.append((idx, "inclusion", cnt, item))
            cnt += 1

    if check_valid(exclusion):
        cnt = 0
        for item in filter_criterea(tolines(exclusion)):
            criteria.append((idx, "exclusion", cnt, item))
            cnt += 1

    return criteria

In [38]:
process_criteria(0, text)

[(0,
  'inclusion',
  0,
  'Patients aged 18 years or over who have attended the MND clinic at the Royal Hallamshire Hospital, Sheffield.'),
 (0,
  'inclusion',
  1,
  'Patients with amyotrophic lateral sclerosis diagnosed by a consultant neurologist with symptom onset within the last three years.'),
 (0,
  'inclusion',
  2,
  'Patients with amyotrophic lateral sclerosis, primary muscular atrophy or progressive lateral sclerosis diagnosed by a consultant neurologist with a deterioration in their condition as evidenced by a deterioration in the ALS functional rating score (ALSFRS-R) by at least two points during the previous 18 months.'),
 (0, 'inclusion', 3, 'Live within 120 minute drive from Sheffield'),
 (0, 'inclusion', 4, 'Age 18 years or older'),
 (0,
  'inclusion',
  5,
  'Person identified by the patient as the major provider of informal care (emotional and/or practical support) to the patient and provides more than one hour per week of unpaid care'),
 (0,
  'inclusion',
  6,
  

In [40]:
inclusions

['Female subjects 18 years of age or older with FIGO Stages III-IV epithelial ovarian, primary peritoneal or fallopian tube cancer with an indication for first-line treatment with paclitaxel and carboplatin x 6 cycles (Subjects with pseudomyxoma, mesothelioma, adenocarcinoma with an unknown primary tumour, carcinosarcoma, sarcoma, mucinous or neuroendocrine histology are excluded',
 'Subjects with FIGO Stage IIIA or IIIB disease must have undergone PDS for ovarian, primary peritoneal or fallopian tube cancer within 12 weeks prior to randomization',
 'Subjects with FIGO Stage IIIC or IV disease must either:',
 'Undergo PDS for epithelial ovarian, primary peritoneal or fallopian tube cancer within 12 weeks prior to randomization or',
 'Plan to have IDS following 3 cycles of paclitaxel and carboplatin plus AMG 386 or AMG 386 placebo for biopsy proven epithelial ovarian, primary peritoneal or fallopian tube cancer',
 'ECOG performance status of 0 or 1',
 'Adequate bone marrow, renal and he