In [None]:
import pandas as pd
import sys
sys.path.append('../')
from src.llmwrapper import LLMWrapper
from src.labelprocessor import LabelProcessor
from utils.loader import load_data
from utils.structs import IntervalHistoryOutput
from environs import Env
env = Env()
env.read_env('../.env')
study_path = env("STUDY_PATH")

In [None]:
# Load data
type = "GI"
studies_folder = f"{study_path}/{type}/CSV/"
j_df, k_df = load_data(studies_folder)

# Process labels and calculate accuracy
processor = LabelProcessor(k_df, j_df)
result_df = processor.process()
gt_df = processor.generate_gt()  # Ground truth DataFrame

In [3]:
gt_df = gt_df.iloc[:20].copy(deep=True)

In [4]:
main_prompt = """You are a helpful assistant with a strong clinical background in oncology. 
You know that medical notes are generally organized in sections, and your task is to find 
the part of the note corresponding to the section containing the History of present illness and 
the Interval history. You should organize this information in a json file that will contain a 
dictionary with two keys: start_string, and end_string. start_string 
should contain the 5 first words of the HPI_Interval_Hx section, and end_string should 
contain the last 5 words of the HPI_Interval_Hx section. Here is the medical note: """

In [None]:
model_params_local = {
    'model_path': '../models/llama-3.2-3b-instruct-q8_0.gguf',
    'ngl': 1000,
    'temp': 0
}
grammar_file = '../utils/HPI_grammar.json'

In [6]:
llm_wrapper = LLMWrapper(
    prompt=main_prompt,
    llm_type='local',  # or 'api' for API-based LLMs
    model_params=model_params_local,
    context_window=8192,
    dataset_name=type,
    output_dir=f'../outputs/{type}/',  # Adjusted output directory
    grammar_file=grammar_file,
    processor=processor  # Pass the processor instance
)

In [None]:
updated_df = llm_wrapper.generate_llm_outputs(gt_df)