** note you need to restart kernel in order to register changes you made to modules (approach1 and approach2)

In [1]:
import json

def get_dataset(dataset_string):
  with open(dataset_string, 'r') as file:
      data = json.load(file)
  return data

### sample code to test different approaches w/ different models, also different iterations of synthetic dataset

In [2]:

from approaches.approach1 import approach1
from approaches.approach2 import approach2
from llms.llm_interaction import GroqClient

patient_records2 = get_dataset('datasets/patient_records2.json')
llm_client = GroqClient(model="llama-3.3-70b-versatile")

pred, true = approach1(patient_records2[0:5], llm_client)
pred, true = approach2(patient_records2[0:5], llm_client)

Groq client initialized with model: llama-3.3-70b-versatile


Processing records: 100%|██████████| 5/5 [00:02<00:00,  2.44record/s]


accuracy of model over 5 generated records: 0.8
precision of model over 5 generated records: 0.75
recall of model over 5 generated records: 1.0
f1 of model over 5 generated records: 0.8571428571428571


Processing records:  60%|██████    | 3/5 [00:02<00:01,  1.20record/s]


KeyboardInterrupt: 

### playing around with prompt templates and dif models

In [4]:
from llms.llm_interaction import OpenAIClient, AnthropicClient

gpt4o = OpenAIClient(model="gpt-4o")
sonnet = AnthropicClient(model="claude-3-5-sonnet-20240620")
versatile = GroqClient(model="llama-3.3-70b-versatile")

prompt_template = "Check if this semantic regex - {regex} - exists in the following patient record - {record_text}."
system_prompt = "You are a helpful AI assistant that only answers True or False based on patient data and provided semantic regex matching."

pred, true = approach1(patient_records2[0:5], gpt4o, prompt_template=prompt_template, system_prompt=system_prompt)
print("="*100)
pred, true = approach1(patient_records2[0:5], sonnet, prompt_template=prompt_template, system_prompt=system_prompt)
print("="*100)
pred, true = approach1(patient_records2[0:5], versatile, prompt_template=prompt_template, system_prompt=system_prompt)


ValueError: OPENAI_API_KEY not found in environment variables

## naive approach 1 (no semantic regex, just nl query)

In [3]:
from approach1 import approach1_naive
from llms.llm_interaction import OpenAIClient, GroqClient

gpt4o = OpenAIClient(model="gpt-4o")
versatile = GroqClient(model="llama-3.3-70b-versatile")

patient_records2_nl_query = get_dataset('datasets/patient_records2_nl_query.json')

pred, true = approach1_naive(patient_records2_nl_query[0:5], gpt4o)
# pred, true = approach1_naive(patient_records2[0:5], sonnet)
pred, true = approach1_naive(patient_records2_nl_query[0:5], versatile)

OpenAI client initialized with model: gpt-4o
Groq client initialized with model: llama-3.3-70b-versatile


Processing records: 100%|██████████| 5/5 [00:03<00:00,  1.43record/s]


accuracy of model over 5 generated records: 1.0
precision of model over 5 generated records: 1.0
recall of model over 5 generated records: 1.0
f1 of model over 5 generated records: 1.0


Processing records: 100%|██████████| 5/5 [00:01<00:00,  2.64record/s]

accuracy of model over 5 generated records: 1.0
precision of model over 5 generated records: 1.0
recall of model over 5 generated records: 1.0
f1 of model over 5 generated records: 1.0



