In [None]:
%load_ext autoreload
%autoreload 2

import os

os.environ["LLMSCOPE_CACHE_DIR"] = "/vol/bitbucket/ad5518/llmscope_cache"

# Pipeline Test


## Dataset


In [None]:
from llmscope.datasets.managers import AciBenchDatasetManager

aci_dataset_manager = AciBenchDatasetManager(splits=["train"])

## Task Preprocessor


In [None]:
from datasets import Dataset

from llmscope.columns import ColumnConfig

column_config = ColumnConfig()


def aci_task_preprocessor(dataset: Dataset) -> Dataset:
    dataset = dataset.rename_column("note", column_config.references.column_name)
    return dataset.select(range(10))

## Prompt Formatter


In [None]:
def aci_prompt_formatter(dialogue, **kwargs):
    system_prompt = "You are an expert clinical assistant specialising in the creation of medically accurate summaries from a dialogue between the doctor and patient."
    user_prompt = f"""Your task is to generate a clinical note based on a conversation between a doctor and a patient. Use the following format for the clinical note:

1. **CHIEF COMPLAINT**: [Brief description of the main reason for the visit]
2. **HISTORY OF PRESENT ILLNESS**: [Summary of the patient's current health status and any changes since the last visit]
3. **REVIEW OF SYSTEMS**: [List of symptoms reported by the patient]
4. **PHYSICAL EXAMINATION**: [Findings from the physical examination]
5. **RESULTS**: [Relevant test results]
6. **ASSESSMENT AND PLAN**: [Doctor's assessment and plan for treatment or further testing]

**Conversation:**
{dialogue}

**Note:**
"""
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

## LLM Manager


In [None]:
from llmscope.llms.managers import VLlmManager

llama_llm_manager = VLlmManager(name="meta-llama/Llama-3.1-8B-Instruct")

## Evaluator


In [None]:
from llmscope.evaluation.evaluators import BleuEvaluator

bleu_evaluator = BleuEvaluator()

## Pipeline


In [None]:
from llmscope.pipelines import SimplePipeline

aci_pipeline = SimplePipeline(
    dataset_manager=aci_dataset_manager,
    task_preprocessor=aci_task_preprocessor,
    prompt_formatter=aci_prompt_formatter,
    evaluator=bleu_evaluator,
    llm_manager=llama_llm_manager,
)

In [None]:
outputs = aci_pipeline.run()

In [None]:
outputs