In [14]:
import os
import sys

path_root = os.path.dirname(os.getcwd())

if path_root not in sys.path:
    sys.path.append(path_root)

# Creating Prompt Templates

In [2]:
from src.config.prompt_template_handler import (
    save_extraction_template_to_json,
    save_generate_template_to_json,
    load_and_validate_extraction_prompt_template,
    load_and_validate_generate_prompt_template,
)

## Saving a prompt template Example for LLamma2

You will need to make sure that you generate template has a **{data}** insert defined.

In [None]:
generate_template_path = (
    "../src/config/templates/generate/llama2_template.json"
)

generate_template = """[INST]
<<SYS>>
You are a medical student answering an exam question about writing clinical notes for patients.
<</SYS>>

Keep in mind that your answer will be asssessed based on incorporating all the provided information and the quality of prose.

1. Use prose to write an example clinical note for this patient's doctor.
2. Use less than three sentences.
3. Do not provide a recommendations.
4. Use the following information:

{data}
[/INST]
"""

save_generate_template_to_json(
    template_str=generate_template, file_path=generate_template_path
)

In [None]:
loaded_generate_template = load_and_validate_generate_prompt_template(
    filename=generate_template_path
)
print(loaded_generate_template)

## Saving a UniversalNER prompt Example for Extraction

You will need to ensure your template has **{input_text}** and **{entity_name}** as a prompt template insert.

In [3]:
extraction_template_path = (
    "../src/config/templates/extraction/universal_ner_template.json"
)

universalner_prompt_template = """
    USER: Text: {input_text}
    ASSISTANT: I’ve read this text.
    USER: What describes {entity_name} in the text?
    ASSISTANT: (model's predictions in JSON format)
    """

save_extraction_template_to_json(
    template_str=universalner_prompt_template,
    file_path=extraction_template_path,
)

Template saved to '../src/config/templates/extraction/universal_ner_template.json'


In [None]:
loaded_extraction_template = load_and_validate_extraction_prompt_template(
    filename=extraction_template_path
)
print(loaded_extraction_template)

# Privacy Fingerprint Experiment Pipeline

You will need to load the ExperimentalConfigHandler.


In [2]:
from src.config.experimental_config_handler import ExperimentalConfigHandler

**default_config_path** is given so the user can point to the default experimental config values. Currently the pipeline copies the original experimental config down into the folder, and if this exists, only uses the experimental config defined in that folder.

In [None]:
# Defines the location of the experimental config file you want to copy.
default_config_path = "../config/experimental_config.yaml"

Then you need to define you overrides:

* **iter_overrides**: For components you want to override that are iterated across. Meaning you want each value to be compared to every other value that is defined.

You will **NEED** to define your experiment name as this defines where your experiment folder should sit.

* **combine_overrides**: Combine overrides is for users to define when they want to look at a combination of values, but not necesarrily iterate across all values. For example your specific llm model, might require a different prompt template.



In [16]:
# Define your iter overrides
iter_overrides = {
    "outputs.experiment_name": "experiment_0001_14_05_24",
    # "synthea.population_num": ["2", "3", "4", "5"],
    "extraction.server_model_type": ["gliner", "ollama", "local"],
}

# Define your combine overrides
combine_overrides = {
    # "extraction.gliner_features.gliner_model": ["urchade/gliner_medium-v2.1", "urchade/gliner_base"],
    # "extraction.ollama_features.ollama_ner_model": ["ollama1", "ollama2"],
    # "extraction.ollama_features.prompt_template_path": ["universalnerpath1", "universalnerpath2"],
    # "generate.llm_model_features.llm_model_name": ["llama2", "llama3"],
    # "generate.llm_model_features.prompt_template_path": ["llama2_template.json", "llama3_template.json"]
}

This initialises the experimental config handler, and then some code has been written so the users can inspect how each of the configurations have been defined.

In [17]:
config_handler = ExperimentalConfigHandler(
    default_config_path=default_config_path,
    iter_overrides=iter_overrides,
    combine_overrides=combine_overrides,
)

print("---- SyntheaConfig ----")
for config in config_handler.load_component_experimental_config("synthea"):
    print(config)

print("\n---- GenerateConfig ----")
for config in config_handler.load_component_experimental_config("generate"):
    print(config)

print("\n---- ExtractionConfig ----")
for config in config_handler.load_component_experimental_config("extraction"):
    print(config)

Configuration file already exists at ../outputs/experiment_0001_14_05_24/experimental_config.yaml
---- SyntheaConfig ----
population_num='50' county='West Yorkshire' path_output='../outputs/experiment_0001_14_05_24/synthea/synthea_0.json'

---- GenerateConfig ----
llm_model_features=GenerateModelFeaturesConfig(llm_model_name='llama2', prompt_template_path='llama2_template.json') synthea_path='../outputs/experiment_0001_14_05_24/synthea/synthea_0.json' path_output='../outputs/experiment_0001_14_05_24/generate/generate_0.json'

---- ExtractionConfig ----
server_model_type='gliner' gliner_features=GlinerFeaturesConfig(gliner_model='urchade/gliner_medium-v2.1') local_features=LocalFeaturesConfig(hf_repo_id=None, hf_filename=None, prompt_template_path=None) ollama_features=OllamaFeaturesConfig(ollama_ner_model=None, prompt_template_path=None) entity_list=['person', 'date of bith', 'nhs number', 'diagnosis'] llm_path='../outputs/experiment_0001_14_05_24/generate/generate_0.json' path_output=

## 1. GenerateSynthea: Generating Synthetic Patient Data using Synthea 

This extracts out all of the synthea defined configuration and then runs the configuration through the pipeline and saves the data to an ./outputs/experiment_name folder.

In [4]:
config_handler.run_component_experiment_config(component_type="synthea")

synthea run 0 with config population_num='50' county='West Yorkshire' path_output='../outputs/experiment_0001_14_05_24/synthea/synthea_0.json'
[{'NHS_NUMBER': '1136099875', 'DATE_OF_BIRTH': '1992-11-26', 'GIVEN_NAME': 'Erick', 'FAMILY_NAME': 'Cummings', 'DIAGNOSIS': 'Viral sinusitis (disorder)'}, {'NHS_NUMBER': '7299667363', 'DATE_OF_BIRTH': '1980-06-26', 'GIVEN_NAME': 'Leif', 'FAMILY_NAME': 'Veum', 'DIAGNOSIS': 'Acute bacterial sinusitis (disorder)'}, {'NHS_NUMBER': '0819789453', 'DATE_OF_BIRTH': '1975-05-13', 'GIVEN_NAME': 'Tanner', 'FAMILY_NAME': 'Armstrong', 'DIAGNOSIS': 'Appendicitis'}, {'NHS_NUMBER': '6247446829', 'DATE_OF_BIRTH': '1975-04-16', 'GIVEN_NAME': 'Moshe', 'FAMILY_NAME': 'Strosin', 'DIAGNOSIS': 'Acute viral pharyngitis (disorder)'}, {'NHS_NUMBER': '3593133113', 'DATE_OF_BIRTH': '1972-09-26', 'GIVEN_NAME': 'Coralee', 'FAMILY_NAME': 'Beer', 'DIAGNOSIS': 'Normal pregnancy'}, {'NHS_NUMBER': '0800603753', 'DATE_OF_BIRTH': '1967-12-09', 'GIVEN_NAME': 'Stanton', 'FAMILY_NAME'

## 2. GenerateLLM: Generating Synthetic Patient Medical Notes 

This extracts out all of the generate defined configuration and then runs the configuration through the pipeline and saves the data to an ./outputs/experiment_name folder.

In [5]:
config_handler.run_component_experiment_config(component_type="generate")

generate run 0 with config llm_model_features=GenerateModelFeaturesConfig(llm_model_name='llama2', prompt_template_path='llama2_template.json') synthea_path='../outputs/experiment_0001_14_05_24/synthea/synthea_0.json' path_output='../outputs/experiment_0001_14_05_24/generate/generate_0.json'
['Clinical Note:\nPatient Name: Erick Cummings\nNHS Number: 1136099875\nDate of Birth: November 26, 1992\n\nPresentation: Mr. Cummings presented to the clinic with a 3-day history of nasal congestion, sneezing, and a mild fever. He reported difficulty breathing through his nose and experienced some facial pain. His medical history includes allergies and a previous viral sinusitis diagnosis.\n\nDiagnosis: Viral sinusitis (disorder)\n\nPlan: Recommended treatment includes nasal decongestants, antihistamines, and saline nasal sprays to help manage symptoms. Follow-up appointment scheduled for one week to monitor progress.', 'Clinical Note:\nPatient Leif Veum, NHS number 7299667363, presents with acute

## 3. Extraction: Re-extracting Entities from the Patient Medical Notes

This extracts out all of the extraction defined configuration and then runs the configuration through the pipeline and saves the data to an ./outputs/experiment_name folder.

In [6]:
config_handler.run_component_experiment_config(component_type="extraction")

extraction run 0 with config server_model_type='gliner' gliner_features=GlinerFeaturesConfig(gliner_model='urchade/gliner_medium-v2.1') local_features=LocalFeaturesConfig(hf_repo_id=None, hf_filename=None, prompt_template_path=None) ollama_features=OllamaFeaturesConfig(ollama_ner_model=None, prompt_template_path=None) entity_list=['person', 'date of bith', 'nhs number', 'diagnosis'] llm_path='../outputs/experiment_0001_14_05_24/generate/generate_0.json' path_output='../outputs/experiment_0001_14_05_24/extraction/extraction_0.json'
[{'Entities': [{'start': 29, 'end': 43, 'text': 'Erick Cummings', 'label': 'person', 'score': 0.9461807012557983}, {'start': 44, 'end': 54, 'text': 'NHS Number', 'label': 'nhs number', 'score': 0.7090232372283936}, {'start': 56, 'end': 66, 'text': '1136099875', 'label': 'nhs number', 'score': 0.8978695869445801}, {'start': 82, 'end': 99, 'text': 'November 26, 1992', 'label': 'date of bith', 'score': 0.9462364912033081}, {'start': 360, 'end': 385, 'text': 'vir

## Visualising the Experiment Workflow

This method on the config handler allows a user to inspect their workflows data. This allows the user to get an idea of which configuration type runs into which output type.

In [13]:
config_handler.load_pipeline_visualisation()

## Reload the Data

By using the above workflow, you can then specify which data you would like to reload back into the notebook.

In [10]:
config_handler.load_specified_data_file(filename="generate_0")

['Clinical Note:\nPatient Name: Erick Cummings\nNHS Number: 1136099875\nDate of Birth: November 26, 1992\n\nPresentation: Mr. Cummings presented to the clinic with a 3-day history of nasal congestion, sneezing, and a mild fever. He reported difficulty breathing through his nose and experienced some facial pain. His medical history includes allergies and a previous viral sinusitis diagnosis.\n\nDiagnosis: Viral sinusitis (disorder)\n\nPlan: Recommended treatment includes nasal decongestants, antihistamines, and saline nasal sprays to help manage symptoms. Follow-up appointment scheduled for one week to monitor progress.',
 'Clinical Note:\nPatient Leif Veum, NHS number 7299667363, presents with acute bacterial sinusitis. Born on June 26th, 1980, he is a 42-year-old male. He is currently experiencing symptoms of nasal congestion, facial pain, and yellow discharge from the nose.',
 "Clinical Note:\nPatient Name: Tanner Armstrong\nNHS Number: 0819789453\nDate of Birth: May 13, 1975\n\nPres