# Exploring FHIR Implementation Guides (IGs) + LLMs

In this notebook, we aim to explore how much LLMs understand about FHIR Implementation Guides (IGs) and investigate ways to upload IG content for deeper analysis.

### Import relevant libraries

Make sure you have langchain-community and beautifulsoup4 installed

In [1]:
# %pip install -U langchain-community bs4

In [2]:
# %pip install httpx

In [3]:
# %pip install -U json_repair

In [3]:
import os
import google.generativeai as gemini
from anthropic import Anthropic
from openai import OpenAI
import io, threading, time, re, json
import pandas as pd
from json_repair import repair_json
from langchain_community.document_loaders import BSHTMLLoader
import shutil
from dotenv import load_dotenv
import httpx
from collections import defaultdict

### Read in US Core IG JSON files

NOTE: Be sure that you have downloaded the US Core IG files from https://www.hl7.org/fhir/us/core/package.tgz and placed them in your current directory

In [4]:
source_folder = 'package'
# 'full-ig/site'
destination_folder = 'package/json_only'
# 'full-ig/html_only'

In [5]:
# Create the destination folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

In [6]:
# List to store only .json files
# html_files = []
json_files = []

In [7]:
for file_name in os.listdir(source_folder):
    # Check if the file ends with .html but not with compound extensions
    if file_name.endswith('.json'):
                                    # and not (file_name.endswith('.ttl.html') or 
                                            #  file_name.endswith('.json.html') or 
                                            #  file_name.endswith('.xml.html') or 
                                            #  file_name.endswith('.change.history.html')):
        json_files.append(file_name)
        # Move the file to the destination folder
        shutil.copy(os.path.join(source_folder, file_name), destination_folder)

### Loading HTML with BeautifulSoup4

This is archived code to load HTML files with BeautifulSoup4. We are now using JSON files so this step is skipped. 


TODO: Rewrite HTML loader as a function for potential future use.

In [9]:
# html_only_folder = 'full-ig/html_only'
json_only_folder = 'package/json_only'

In [10]:
# Create a new folder named "plain_text" inside the current directory
# processed_files_path = os.path.join(html_only_folder, 'plain_txt')

# Create the destination folder if it doesn't exist
# if not os.path.exists(processed_files_path):
#     os.makedirs(processed_files_path)

In [11]:
# List to store the files processed
# processed_files = []

In [12]:
# # Loop through the files in the HTML folder
# for file_name in os.listdir(html_only_folder):
#     # Full path to the .html file
#     html_file_path = os.path.join(html_only_folder, file_name)
    
#     # Check if it's a file (not a directory)
#     if os.path.isfile(html_file_path):
#         # Use BSHTMLLoader to load the HTML content
#         loader = BSHTMLLoader(html_file_path, bs_kwargs={'features': 'html.parser'})
#         data = loader.load()
#         # Extract the plain text from the loaded data
#         plain_text = '\n'.join([doc.page_content for doc in data])
        
#         # Create the output file path with .txt extension
#         txt_file_name = file_name.replace('.html', '.txt')
#         txt_file_path = os.path.join(processed_files_path, txt_file_name)
        
#         # Write the extracted plain text to the new .txt file
#         with open(txt_file_path, 'w', encoding='utf-8') as txt_file:
#             txt_file.write(plain_text)
        
#         # Append to processed files list
#         processed_files.append(txt_file_name)

### Grouping files

In [8]:
def group_files_by_base_name(directory_path, delimiter='-'):
    """
    Group files in the directory by their base name (portion before a delimiter).
    
    Args:
    directory_path (str): Path to the directory containing files.
    delimiter (str): The delimiter to split the file name on (default is '-').

    Returns:
    dict: A dictionary where keys are base names and values are lists of files that share the same base name.
    """
    grouped_files = defaultdict(list)
    
    # Iterate through the files in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith('.json'):  # Only process .txt files
            if delimiter in filename:  # Only consider files with the delimiter
                # Get the base name (before the first delimiter)
                base_name = filename.split(delimiter)[0]
                
                # Append the file to the group corresponding to its base name
                grouped_files[base_name].append(filename)
    
    return grouped_files


In [9]:
directory_path = 'package/json_only'
# 'full-ig/html_only/plain_txt'

In [10]:
grouped_files = group_files_by_base_name(directory_path)

In [11]:
for base_name, files in grouped_files.items():
    print(f"Base name: {base_name} (Total files: {len(files)})")
    # for file in files:
    #     print(f"  - {file}")

Base name: StructureDefinition (Total files: 63)
Base name: ValueSet (Total files: 29)
Base name: SearchParameter (Total files: 110)
Base name: CapabilityStatement (Total files: 2)
Base name: CodeSystem (Total files: 5)
Base name: OperationDefinition (Total files: 1)
Base name: ImplementationGuide (Total files: 1)


In [12]:
def copy_files_to_folders(directory_path, grouped_files):
    """
    Copy files to folders if the base name group has more than 1 file, and remove them from the original directory.
    
    Args:
    directory_path (str): Path to the directory containing files.
    grouped_files (dict): Dictionary of grouped files by base name.
    """
    for base_name, files in grouped_files.items():
        if len(files) >= 1:  # Only process groups with more than 1 file
            # Create a folder for the base name in the same directory
            base_folder = os.path.join(directory_path, base_name)
            if not os.path.exists(base_folder):
                os.makedirs(base_folder)  # Create the folder if it doesn't exist
            print(f"Created folder: {base_folder}")
            
            # Copy each file in the group to the new folder
            for file in files:
                source_file = os.path.join(directory_path, file)
                destination_file = os.path.join(base_folder, file)
                shutil.copy(source_file, destination_file)  # Copy the file
                # print(f"Copied {file} to {base_folder}")
                
                # Remove the file from the original directory
                # os.remove(source_file)
                # print(f"Removed {file} from original directory")

In [13]:
copy_files_to_folders(directory_path, grouped_files)

Created folder: package/json_only/StructureDefinition
Created folder: package/json_only/ValueSet
Created folder: package/json_only/SearchParameter
Created folder: package/json_only/CapabilityStatement
Created folder: package/json_only/CodeSystem
Created folder: package/json_only/OperationDefinition
Created folder: package/json_only/ImplementationGuide


These files categories match the files that we had identified to keep. 

## Preparing files

In this step, we're preparing our loaded files by combining JSONs in each directory into a singular JSON, structured with top level items: "resource_Type", "total", and "entry".

In [14]:
def combine_json_files(folder_path):
    """
    Combines all JSON files in a folder into a single array of JSON objects.
    
    Args:
        folder_path (str): Path to the folder containing JSON files
        
    Returns:
        list: List of JSON objects from all files
    """
    combined_json = []
    
    # Iterate through all files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r') as file:
                    json_content = json.load(file)
                    combined_json.append(json_content)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON from {filename}: {e}")
            except Exception as e:
                print(f"Error processing {filename}: {e}")
                
    return combined_json

def create_consolidated_jsons(base_directory='package/json_only'):
    """
    Creates consolidated JSON files for each subdirectory.
    
    Args:
        base_directory (str): Base directory containing the categorized folders
    """
    # Get all subdirectories
    subdirs = [d for d in os.listdir(base_directory) 
              if os.path.isdir(os.path.join(base_directory, d))]
    
    # Process each subdirectory
    for subdir in subdirs:
        folder_path = os.path.join(base_directory, subdir)
        print(f"Processing {subdir}...")
        
        # Combine all JSON files in this folder
        combined_data = combine_json_files(folder_path)
        
        if combined_data:
            # Create output filename
            output_filename = f"{subdir}_combined.json"
            output_path = os.path.join(base_directory, output_filename)
            
            # Write the combined JSON to a file
            try:
                with open(output_path, 'w') as outfile:
                    json.dump({
                        "resourceType": subdir,
                        "total": len(combined_data),
                        "entry": combined_data
                    }, outfile, indent=2)
                print(f"Created {output_filename} with {len(combined_data)} entries")
            except Exception as e:
                print(f"Error writing {output_filename}: {e}")

In [15]:
# Create the consolidated JSON files
create_consolidated_jsons()

Processing StructureDefinition...
Created StructureDefinition_combined.json with 63 entries
Processing CapabilityStatement...
Created CapabilityStatement_combined.json with 2 entries
Processing CodeSystem...
Created CodeSystem_combined.json with 5 entries
Processing ValueSet...
Created ValueSet_combined.json with 29 entries
Processing SearchParameter...
Created SearchParameter_combined.json with 110 entries
Processing ImplementationGuide...
Created ImplementationGuide_combined.json with 1 entries
Processing OperationDefinition...
Created OperationDefinition_combined.json with 1 entries


Now we need to identify narrative files and use that content to provide context to the LLMs.

Archived functions/code to combine txt files

In [19]:

# def combine_txt_files(directory_path):
#     """Combines all .txt files in the specified directory into a single string."""
#     combined_text = []
#     #iterate through txt files in directory
#     for filename in os.listdir(directory_path):
#         if filename.endswith('.txt'):
#             file_path = os.path.join(directory_path, filename)
#             try:
#                 with open(file_path, 'r', encoding='utf-8') as file:
#                     content = file.read()
#                     #append text from txt file to combined_text
#                     combined_text.append(content)
#             except Exception as e:
#                 print(f"Error reading {filename}: {e}")
    
#     return "\n".join(combined_text)


In [20]:
# Directory where txt files are located
# txt_directory = 'full-ig/html_only/plain_txt/SearchParameter'
# create combined text object
# combined_content = combine_txt_files(txt_directory)

In [21]:
# print(combined_content)

In [22]:
# Open and read the JSON file
with open('/Users/amathur/Documents/ONCLAIVE/onclaive-aanchalwip/package/json_only/ImplementationGuide/ImplementationGuide-hl7.fhir.us.core.json', 'r') as file:
    implementation_guide = json.load(file)

## Sending IG through LLMs

Read in API keys for Claude, Gemini, and GPT from .env file

In [23]:
load_dotenv()

claude_api_key = os.getenv('ANTHROPIC_API_KEY')
gemini_api_key = os.getenv('GEMINI_API_KEY')
OpenAI.api_key = os.getenv('OPENAI_API_KEY')

#### Setting up Claude

In [24]:
claude = Anthropic(api_key = claude_api_key)
claude_version = "claude-3-5-sonnet-20240620"  # "claude-3-opus-20240229"   "claude-3-5-sonnet-20240620" "claude-3-sonnet-20240229" "claude-3-haiku-20240307"
claude_max_output_tokens = 8192  # claude 3 opus is only 4096 tokens, sonnet is 8192

Functions to send IG content Claude and request analysis

In [25]:
CERT_PATH = '/Users/amathur/ca-certificates.crt'

In [26]:
def create_anthropic_client():
    """Create Anthropic client with proper certificate verification"""
    verify_path = CERT_PATH if os.path.exists(CERT_PATH) else True
    http_client = httpx.Client(
        verify=verify_path,
        timeout=30.0
    )
    return Anthropic(
        api_key=claude_api_key,
        http_client=http_client
    )

In [27]:
#function to add sleep pauses
def heartbeat(stop_event, start_time):
    """Prints elapsed time periodically until stopped."""
    while not stop_event.is_set():
        elapsed = time.time() - start_time
        print(f"... still processing ({elapsed:.1f}s elapsed)")
        time.sleep(5)

#send message request to claude letting it know an IG is being shared and providing it the action prompt
def message_claude(claude_client, user_prompt, content_text, max_retries=3):
    """
    Sends a message to Claude API with the provided prompt and content.
    """
    # Construct the full prompt
    full_prompt = f"""Here is the content of an HL7 FHIR Implementation Guide:

{content_text}

{user_prompt}"""
    
    # Set up heartbeat
    start_time = time.time()
    stop_event = threading.Event()
    heartbeat_thread = threading.Thread(target=heartbeat, args=(stop_event, start_time))
    heartbeat_thread.start()
    
    retry_count = 0
    last_error = None
    
    while retry_count < max_retries:
        try:
            print(f"Sending request to Claude API (attempt {retry_count + 1}/{max_retries})...")
            
            response = claude_client.messages.create(
                model=claude_version,
                max_tokens=claude_max_output_tokens,
                messages=[{"role": "user", "content": full_prompt}],
                temperature=0.7
            )
            
            print("Successfully received response from Claude API")
            response_text = response.content[0].text
            return response, response_text
            
        except Exception as e:
            last_error = e
            retry_count += 1
            if retry_count < max_retries:
                wait_time = 2 ** retry_count
                print(f"Error occurred: {str(e)}")
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print(f"Failed after {max_retries} attempts. Last error: {str(e)}")
                raise
        finally:
            stop_event.set()
            heartbeat_thread.join()

#analyze content of IG
def analyze_ig(content, prompt):
    """
    Main function to process IG files and get Claude's analysis.
    """
    try:
        # Initialize Claude client
        claude_client = create_anthropic_client()
        
        #confirm combined text object has been created
        if not content:
            raise ValueError("No content found in text files")
        
        #print characters of combined text object
        print(f"Combined content length: {len(content)} characters")
        
        print("Sending to Claude API...")
        response, response_text = message_claude(claude_client, prompt, content)
        
        return response_text
        
    except Exception as e:
        print(f"Error in analyze_ig: {str(e)}")
        raise

In [28]:
# Load the implementation guide
with open("/Users/amathur/Documents/ONCLAIVE/onclaive-aanchalwip/package/json_only/ImplementationGuide/ImplementationGuide-hl7.fhir.us.core.json", "r") as f:
    content = f.read()

In [29]:
prompt = f"""
The the content below is a JSON file in <JSON_CONTENT></JSON_CONTENT> tags. 
Can you summarize the key information and purpose of this data? 

<JSON_CONTENT>
{content}
</JSON_CONTENT>
"""
result = analyze_ig(content, prompt)
print(result)

Combined content length: 292372 characters
Sending to Claude API...
... still processing (0.0s elapsed)
Sending request to Claude API (attempt 1/3)...
... still processing (5.0s elapsed)
... still processing (10.0s elapsed)
... still processing (15.0s elapsed)
... still processing (20.0s elapsed)
... still processing (25.0s elapsed)
Successfully received response from Claude API
This JSON file represents an Implementation Guide (IG) for the US Core FHIR profiles. Here are the key points:

1. Purpose: It defines minimum conformance requirements for accessing patient data based on FHIR Version R4.

2. Origin: It's based on requirements from Argonaut pilot implementations, ONC 2015 Edition Common Clinical Data Set (CCDS), and ONC U.S. Core Data for Interoperability (USCDI) v1.

3. Scope: It covers various FHIR resources and profiles, including AllergyIntolerance, CarePlan, CareTeam, Condition, Device, DiagnosticReport, DocumentReference, Encounter, Goal, Immunization, Location, Medication

#### Setting Up Gemini

In [30]:
gemini_version = "models/gemini-1.5-pro-001" 
gemini_max_output_tokens = 8192
temp = 0.75  

In [31]:
# gemini maintains history in a chat session object. Messages are sent to a specific chat session.
def message_gemini(prompt, chat_session=None):
    global gemini_version, temp, gemini_max_output_tokens
    if chat_session is None:
        model = gemini.GenerativeModel(model_name=gemini_version, generation_config={"max_output_tokens": gemini_max_output_tokens, "response_mime_type": "application/json"})
        chat_session = model.start_chat()
    start_time = time.time()
    stop_event = threading.Event()
    heartbeat_thread = threading.Thread(target=heartbeat, args=(stop_event, start_time))
    heartbeat_thread.start()
    try:
        resoponse = chat_session.send_message(prompt, stream=False)
    finally:
        stop_event.set()  # Signal the heartbeat to stop
        heartbeat_thread.join()  # Wait for the heartbeat thread to finish
    return resoponse.text

Gemini Configurations

In [33]:
prompt = f"""
The the content below is a JSON file in <JSON_CONTENT></JSON_CONTENT> tags. 
Can you summarize the key information and purpose of this data? 

<JSON_CONTENT>
{content}
</JSON_CONTENT>
"""

In [34]:
model = gemini.GenerativeModel(model_name='gemini-1.5-flash-latest')

In [35]:
model_info = gemini.get_model('models/gemini-1.5-flash-latest')
(model_info.input_token_limit, model_info.output_token_limit)

(1000000, 8192)

In [36]:
model.count_tokens(prompt[0:model_info.input_token_limit])

total_tokens: 72174

In [37]:
response = model.generate_content(prompt[0:model_info.input_token_limit])
print(response.text)

This JSON file represents an FHIR Implementation Guide (IG) named "US Core Implementation Guide". It defines the minimum conformance requirements for accessing patient data in the US healthcare system. 

Here's a breakdown of the key information:

* **Purpose:** To establish a standard for interoperability between healthcare systems in the US by defining a set of FHIR profiles, extensions, search parameters, and operations that all systems must adhere to.
* **Version:**  7.0.0
* **FHIR Version:** 4.0.1
* **Based On:** The Argonaut pilot implementations, ONC 2015 Edition Common Clinical Data Set (CCDS), and ONC U.S. Core Data for Interoperability (USCDI) v1.
* **Scope:**  Defines minimum expectations for accessing patient data for various resources including Patient, Encounter, Condition, Observation, Medication, Immunization, and many more.
* **Content:**
    * **Profiles:** Specifies the structure and content of different FHIR resources. 
    * **Extensions:** Defines custom elements 

This JSON file represents a FHIR Implementation Guide (IG) called "US Core Implementation Guide". It's designed to define the minimum conformance requirements for accessing patient data within the US healthcare system. 

Here's a breakdown of the key information and purpose:

**Key Information:**

* **ID:** hl7.fhir.us.core
* **URL:** http://hl7.org/fhir/us/core/ImplementationGuide/hl7.fhir.us.core
* **Version:** 7.0.0
* **Name:** USCore
* **Title:** US Core Implementation Guide
* **Status:** active
* **Publisher:** HL7 International / Cross-Group Projects
* **Description:**  The IG builds upon FHIR R4 and incorporates requirements from past initiatives like Argonaut, ONC CCDS, and USCDI. It aims to facilitate interoperability by establishing minimum standards for accessing patient data. 
* **Jurisdiction:** US
* **FHIR Version:** 4.0.1
* **Dependencies:**  Relies on several other FHIR IGs and packages, such as HL7 Terminology, Smart App Launch, VSAC, and others.
* **Profiles and Extensions:** Defines numerous FHIR profiles and extensions for various resource types (e.g., Patient, Encounter, Observation, MedicationRequest). These profiles detail specific requirements for data elements, codes, and value sets.
* **Search Parameters and Operations:** Includes search parameters and operations, such as "$docref", for retrieving patient data in a standardized way. 
* **Terminology:**  Uses standard terminologies like LOINC and SNOMED CT for data elements.
* **Examples:** Provides numerous example resources that demonstrate the use of the profiles and extensions.
* **Guidance:** Offers general guidance, clinical notes guidance, medication list guidance, and specific guidance on USCDI requirements. 
* **Future Directions:**  Outlines plans for future expansion and updates to the US Core IG.

**Purpose:**

The main purpose of the US Core Implementation Guide is to:

* **Promote interoperability:** By defining common standards for accessing patient data, it allows different healthcare systems to exchange information more effectively.
* **Support data exchange:**  It provides a framework for implementing FHIR within the US healthcare landscape, ensuring that systems can share essential patient data.
* **Enable data access:**  It establishes the minimum requirements for accessing and using patient data, promoting patient-centered care.
* **Facilitate certification:**  It serves as a foundation for ONC Health IT certification, ensuring that systems meet the necessary standards for interoperability. 

In essence, the US Core Implementation Guide acts as a roadmap for implementing FHIR in the US, promoting interoperability and enhancing the use of electronic health information. 



In [39]:
# draft prompt to ask LLM to summarize IG text
prompt = """Please analyze this Implementation Guide and provide:
1. A high-level summary of what this IG is about
2. Key profiles and extensions defined
3. Main requirements and constraints
4. Notable usage patterns or guidance

In as much detail as possible, please organize the information clearly and highlight particularly important aspects."""

# analyze partial combined text (text currently too large to all be ingested)
# result = analyze_ig(combined_content[1:15000], prompt)
# print(result)

Experimenting with sending the IG in smaller chunks
Note: incomplete

In [40]:
#this function will no longer properly run due to changes made to how the text files are combined earlier in the script

#defining function to split IG content into chunks
def split_content(text, max_bytes=8000000):  # Leave some room for the prompt
    """
    Splits content into chunks that won't exceed Claude's byte limit.
    Tries to split at file boundaries marked by === Content from
    """
    
    # First split by file markers
    file_sections = text.split("=== Content from")
    
    chunks = []
    current_chunk = ""
    current_bytes = 0
    
    for section in file_sections:
        if not section.strip():
            continue
            
        # Add the marker back except for the first section
        if current_chunk:
            section = "=== Content from" + section
            
        # Calculate bytes of this section
        section_bytes = len(section.encode('utf-8'))
        
        # If adding this section would exceed limit, start new chunk
        if current_bytes + section_bytes > max_bytes:
            if current_chunk:
                chunks.append(current_chunk)
            current_chunk = section
            current_bytes = section_bytes
        else:
            current_chunk += section
            current_bytes += section_bytes
    
    # Add the last chunk if it exists
    if current_chunk:
        chunks.append(current_chunk)
    
    return chunks

def message_claude(claude_client, user_prompt, content_text, max_retries=3):
    """
    Sends a message to Claude API with the provided prompt and content.
    """
    # Construct the full prompt
    full_prompt = f"""Here is the content of an HL7 FHIR Implementation Guide:

{content_text}

{user_prompt}"""
    # Check content length in bytes
    prompt_bytes = len(full_prompt.encode('utf-8'))
    if prompt_bytes > 9000000:  # Claude's limit
        raise ValueError(f"Content too large: {prompt_bytes} bytes")
    
    # Set up heartbeat
    start_time = time.time()
    stop_event = threading.Event()
    heartbeat_thread = threading.Thread(target=heartbeat, args=(stop_event, start_time))
    heartbeat_thread.start()
    
    retry_count = 0
    last_error = None
    
    while retry_count < max_retries:
        try:
            print(f"Sending request to Claude API (attempt {retry_count + 1}/{max_retries})...")
            
            response = claude_client.messages.create(
                model=claude_version,
                max_tokens=claude_max_output_tokens,
                messages=[{"role": "user", "content": full_prompt}],
                temperature=0.7
            )
            
            print("Successfully received response from Claude API")
            response_text = response.content[0].text
            return response, response_text
            
        except Exception as e:
            last_error = e
            retry_count += 1
            if retry_count < max_retries:
                wait_time = 2 ** retry_count
                print(f"Error occurred: {str(e)}")
                print(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print(f"Failed after {max_retries} attempts. Last error: {str(e)}")
                raise
        finally:
            stop_event.set()
            heartbeat_thread.join()

def analyze_ig_in_chunks(txt_directory, prompt):
    """
    Analyzes the IG content in chunks and combines the results.
    """
    try:
        # Initialize Claude client
        claude_client = claude_client = Anthropic()
        
        # Combine all txt files
        print("Combining text files...")
        combined_content = combine_txt_files(txt_directory)
        
        if not combined_content:
            raise ValueError("No content found in text files")
        
        print(f"Total combined content length: {len(combined_content)} characters")
        
        # Split content into chunks
        chunks = split_content(combined_content)
        print(f"Split content into {len(chunks)} chunks")
        
        # Process each chunk
        all_responses = []
        for i, chunk in enumerate(chunks, 1):
            print(f"\nProcessing chunk {i} of {len(chunks)}...")
            
            # Modify prompt for chunks after the first one
            if i > 1:
                chunk_prompt = f"""This is chunk {i} of {len(chunks)} from the same Implementation Guide. 
                Please continue the analysis, focusing on any new information in this chunk. 
                Do not repeat information you've already covered, only add new findings.
                
                {prompt}"""
            else:
                chunk_prompt = prompt
            
            response, response_text = message_claude(claude_client, chunk_prompt, chunk)
            all_responses.append(response_text)
            
            print(f"Completed chunk {i}")
        
        # Combine all responses
        final_response = "\n\n=== Combined Analysis ===\n\n" + "\n\n=== Additional Findings ===\n\n".join(all_responses)
        
        return final_response
        
    except Exception as e:
        print(f"Error in analyze_ig_in_chunks: {str(e)}")
        raise


In [41]:
# result = analyze_ig_in_chunks(txt_directory, prompt)
# print(result)

## Passing Images to LLM (Claude)

In [16]:
#ensure you have installed IPython
#%pip install anthropic IPython


Note: you may need to restart the kernel to use updated packages.


In [17]:
#import packages
from IPython.display import Image
import base64


In [None]:
#set up claude instance
client = Anthropic()
MODEL_NAME = "claude-3-opus-20240229"

#function to decode base64 encoded image
def get_base64_encoded_image(image_path):
    with open(image_path, "rb") as image_file:
        binary_data = image_file.read()
        base_64_encoded_data = base64.b64encode(binary_data)
        base64_string = base_64_encoded_data.decode('utf-8')
        return base64_string

In [None]:
#set path to image
image_path=''

#set message and prompt to Claude API
message_list = [
    {
        "role": 'user',
        "content": [
            {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": get_base64_encoded_image(image_path)}},
            {"type": "text", "text": "Explain the diagram"}
        ]
    }
]

# generate response
response = client.messages.create(
    model=MODEL_NAME,
    max_tokens=2048,
    messages=message_list
)

#print the text of claude's response
print(response.content[0].text)

## TODO: Read in relevant context files 
- IG_golden_rules
- IG_example
- IG_profile

TODO: Prompts