In [2]:
from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams
from pydantic import BaseModel
from typing import List, Optional
from enum import Enum

class SexEnum(str, Enum):
    MALE = "MALE"
    FEMALE = "FEMALE"
    ALL = "ALL"

class AgeGroupEnum(str, Enum):
    CHILD = "CHILD"
    ADULT = "ADULT"
    OLDER_ADULT = "OLDER_ADULT"

class Age(BaseModel):
    Min: Optional[int] = None
    Max: Optional[int] = None
    AgeGroup: List[AgeGroupEnum]

class EC(BaseModel):
    InclusionCriteria: List[str]
    ExclusionCriteria: List[str]
    Sex: SexEnum
    Age: Age
    AcceptHealthyVolunteers: bool

json_schema = EC.model_json_schema()
model_id = "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16"
guided_decoding_params = GuidedDecodingParams(json=json_schema, backend="lm-format-enforcer")
sampling_params = SamplingParams(guided_decoding=guided_decoding_params, max_tokens=4096)
llm = LLM(model=model_id, max_model_len=50000)




INFO 12-30 23:49:28 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
INFO 12-30 23:49:28 config.py:1136] Chunked prefill is enabled with max_num_batched_tokens=512.
INFO 12-30 23:49:28 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16', speculative_config=None, tokenizer='neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=50000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=compressed-tensors, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=O

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


INFO 12-30 23:49:32 model_runner.py:1077] Loading model weights took 8.4927 GB
INFO 12-30 23:49:33 worker.py:232] Memory profiling results: total_gpu_memory=23.68GiB initial_memory_usage=8.97GiB peak_torch_memory=9.67GiB memory_usage_post_profile=9.00GiB non_torch_memory=0.50GiB kv_cache_size=11.15GiB gpu_memory_utilization=0.90
INFO 12-30 23:49:33 gpu_executor.py:113] # GPU blocks: 5706, # CPU blocks: 2048
INFO 12-30 23:49:33 gpu_executor.py:117] Maximum concurrency for 50000 tokens per request: 1.83x
INFO 12-30 23:49:36 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 12-30 23:49:36 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 1

In [None]:
base_prompt = """Using the following eligibility criteria details, generate a structured JSON output that adheres to the schema provided. Do not recwrite the criteria. Just recite it in a structured JSON format."""

raw_ec = "#Eligibility Criteria: Inclusion Criteria: * Diagnosis of CLL as per National Cancer Institute Working Group Guidelines * Patients undergoing routine blood draws as part of their ongoing follow up for CLL * >= 18 years * Ability to provide consent in English * Patient must have measurable disease as defined by an absolute lymphocyte count greater than 5,000/mm3 or have archived lymph node or bone marrow with CLL involvement. Exclusion Criteria: * Patients who have received cytotoxic drug, oral or intravenous steroid or targeted antibody therapy for their CLL, * other hematologic malignancy or other disease process within the past 6 months are excluded. ##Sex : ALL ##Ages : - Minimum Age : 18 Years - Age Group (Child: birth-17, Adult: 18-64, Older Adult: 65+) : ADULT, OLDER_ADULT ##Accepts Healthy Volunteers: No"

outputs = llm.generate(
    prompts="\n".join([base_prompt, raw_ec]).replace("(Child: birth-17, Adult: 18-64, Older Adult: 65+)", ""),
    sampling_params=sampling_params,
)

print(outputs[0].outputs[0].text)

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.93s/it, est. speed input: 68.37 toks/s, output: 72.13 toks/s]


'\n{\n  "InclusionCriteria": [\n    "Diagnosis of CLL as per National Cancer Institute Working Group Guidelines",\n    "Patients undergoing routine blood draws as part of their ongoing follow up for CLL",\n    ">= 18 years",\n    "Ability to provide consent in English",\n    "Patient must have measurable disease as defined by an absolute lymphocyte count greater than 5,000/mm3 or have archived lymph node or bone marrow with CLL involvement"\n  ],\n  "ExclusionCriteria": [\n    "Patients who have received cytotoxic drug, oral or intravenous steroid or targeted antibody therapy for their CLL",\n    "other hematologic malignancy or other disease process within the past 6 months are excluded"\n  ],\n  "Sex": "ALL",\n  "Age": {\n    "Min": 18,\n    "Max": null,\n    "AgeGroup": [\n      "ADULT",\n      "OLDER_ADULT"\n    ]\n  },\n  "AcceptHealthyVolunteers": false\n} \n  \r\n\r\n\r\n  '

In [8]:
import json
def extract_ec(raw_ec: str, llm) -> EC:
    base_prompt = """Using the following eligibility criteria details, generate a structured JSON output that adheres to the schema provided. Do not recwrite the criteria. Just recite it in a structured JSON format."""
    outputs = llm.generate(
        prompts="\n".join([base_prompt, raw_ec]).replace("(Child: birth-17, Adult: 18-64, Older Adult: 65+)", ""),
        sampling_params=sampling_params,
    )
    return_json = json.loads(outputs[0].outputs[0].text)
    EC.model_validate(return_json)
    return return_json
    
extract_ec(raw_ec, llm)

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it, est. speed input: 70.31 toks/s, output: 71.37 toks/s]


{'InclusionCriteria': ['Diagnosis of CLL as per National Cancer Institute Working Group Guidelines',
  'Patients undergoing routine blood draws as part of their ongoing follow up for CLL',
  '>= 18 years',
  'Ability to provide consent in English',
  'Patient must have measurable disease as defined by an absolute lymphocyte count greater than 5,000/mm3 or have archived lymph node or bone marrow with CLL involvement'],
 'ExclusionCriteria': ['Patients who have received cytotoxic drug, oral or intravenous steroid or targeted antibody therapy for their CLL',
  'Other hematologic malignancy or other disease process within the past 6 months are excluded'],
 'Sex': 'ALL',
 'Age': {'Min': 18, 'AgeGroup': ['ADULT', 'OLDER_ADULT']},
 'AcceptHealthyVolunteers': False}

In [5]:
print(outputs[0].outputs[0].text)


{
  "InclusionCriteria": [
    "Diagnosis of CLL as per National Cancer Institute Working Group Guidelines",
    "Patients undergoing routine blood draws as part of their ongoing follow up for CLL",
    ">= 18 years",
    "Ability to provide consent in English",
    "Patient must have measurable disease as defined by an absolute lymphocyte count greater than 5,000/mm3 or have archived lymph node or bone marrow with CLL involvement"
  ],
  "ExclusionCriteria": [
    "Patients who have received cytotoxic drug, oral or intravenous steroid or targeted antibody therapy for their CLL",
    "other hematologic malignancy or other disease process within the past 6 months are excluded"
  ],
  "Sex": "ALL",
  "Age": {
    "Min": 18,
    "Max": null,
    "AgeGroup": [
      "ADULT",
      "OLDER_ADULT"
    ]
  },
  "AcceptHealthyVolunteers": false
} 
  


  
