In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from preprocess import process_cad_rads_labels, evaluate_performance, plot_confusion_matrix, compare_certainty

In [4]:
# API = 'chatgpt'
API = 'claude'
# API = 'gemini'

In [32]:
USE_MANYSHOT = True

MAX_OUTPUT_LENGTH = 1000

In [7]:

if API == 'chatgpt':
    from openai import OpenAI
    api_key = 'YOUR_API_KEY'
    client = OpenAI(api_key=api_key)

elif API == "claude":
    import anthropic
    api_key = 'YOUR_API_KEY'
    client = anthropic.Anthropic(api_key=api_key)

elif API == "gemini":
    import google.generativeai as genai
    api_key = 'YOUR_API_KEY'
    genai.configure(api_key=api_key)

print(API)

claude


In [9]:
VERSION = 'v4.5'

data = pd.read_csv(f'sample_processed_{VERSION}.csv')

data.fillna(value='None', inplace=True)

In [23]:
pool = pd.read_csv('manyshot_pool_from_claude.csv')
pool_idx = list(pool['Unnamed: 0.2'])
no_pool = data.drop(index=pool_idx)

In [25]:
f = open("prompt_system_1016_cot_edit.txt", 'r', encoding='utf-8')
prompt_sys = f.read()
f.close()
print(prompt_sys)


You need to extract the CAD-RADS, Plaque burden, and Modifier from a coronary artery CT report. Refer to the information below:

CAD-RADS
CAD-RADS is determined based on the most severe stenosis among various coronary branches:
- 0: 0%, Absence of CAD
- 1: 1-24%, Minimal non-obstructive CAD
- 2: 25-49%, Mild non-obstructive CAD (Mild stenosis)
- 3: 50-69%, Moderate stenosis
- 4A: 70-99% in a single or 2-vessel, Severe stenosis
- 4B: If the left main (LM) shows stenosis of 50% (moderate) or higher, or if 3-vessel(including LAD, LCx, RCA, OM, D, PDA) have stenosis of 70% (severe) or higher.
- 5: 100% Total coronary occlusion
 
Plaque Burden
Plaque Burden is determined by the overall amount of coronary plaque and Coronary artery calcium (CAC):
- None: 0
- P1: Mild, 1-100
- P2: Moderate, 101-300
- P3: Severe, 301-999
- P4: Extensive, >1000

Modifiers 
There are 6 modifiers that can be added to the CAD-RADS category. More than one modifiers can be applicable:
- N: If "Limited evaluation due

In [26]:
prompt_user_few = """Please provide the rationale leading to the final answer, which I will supply. Reference the examples below for the format.
==="""

In [9]:
if API=='gemini':
    client = genai.GenerativeModel(
        # model_name="gemini-1.5-flash",
        model_name="gemini-1.5-pro",
        generation_config={
            "temperature": 0,
            "max_output_tokens": MAX_OUTPUT_LENGTH,
            "response_mime_type": "text/plain",
            },
        system_instruction=prompt_sys
        )

In [27]:
def get_response(API, client, prompt_sys, prompt): 
  if API == 'chatgpt':
    response = client.chat.completions.create(
      model="gpt-4-turbo",
      # model="gpt-4o",
      messages=[
        {
          "role": "system",
          "content": [
            {
              "type": "text",
              "text": prompt_sys,
            }
          ]
        },
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            }
          ]
        }
      ],
      max_tokens=MAX_OUTPUT_LENGTH,
      temperature=0,     
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )

    return response.choices[0].message.content
  
  elif API == 'claude':
    response = client.messages.create(
      model="claude-3-5-sonnet-20240620",
      max_tokens=MAX_OUTPUT_LENGTH,
      temperature=0,
      system=prompt_sys,
      messages=[
          {
              "role": "user",
              "content": [
                  {
                      "type": "text",
                      "text": prompt
                  }
              ]
          }
      ]
    )
    return response.content[0].text

  elif API == 'gemini':
    chat_session = client.start_chat()
    response = chat_session.send_message(prompt)
    return response.text


In [28]:
trigger = "Final Answer (CAD-RADS/Plaque Burden/Modifier):"

In [160]:
def make_many_shot_prompt(base_prompt, final_answer, report, shots):
    """
    Create a prompt with n-shot examples
    
    Args:
        n (int): Number of shots to include
        report (str): The target report to classify
        shots (list): List of dictionaries containing shot examples with 'Report' and '0' columns
        
    Returns:
        str: Formatted prompt with shots
    """
    prompt = ""
    for i, shot in enumerate(shots):
        prompt += f"Example {i+1}. The final answer should be '{shot['Label']}':\n\n" + f"### Report:\n{shot['Report']}\n\n"
        prompt += f"### Rationale: \n{shot['0']}\n===\n"
    
    prompt += f"""Now, provide the rationale leading to the final answer, '{final_answer}'. Follow exact same format like the examples:

### Report: \n{report}\n\n### Rationale:\n"""
    
    prompt = base_prompt + '\n' + prompt
    return prompt

In [163]:
responses = []
labels = []
# for i in tqdm(range(0,1)):
for i in tqdm(range(len(no_pool))):
    sampled_shots = pool.sample(n=50)[['Report', 'Label', '0']].to_dict('records')

    # Create prompt using the sampled shots
    prompt = make_many_shot_prompt(
        prompt_user_few,
        final_answer=no_pool['Label'].iloc[i],
        report=no_pool['Report'].iloc[i],
        shots=sampled_shots
    )

    # print(prompt)
    response = get_response(API, client, prompt_sys, prompt)

    if trigger in response:
        responses.append(response)
    else:
        print(f'Response {i} does not contain the final answer. Retrying.')
        print(response)
        new_response = get_response(API, client, prompt_sys, prompt + "\n\n### Rationale:\n" + response + "\n\n" + trigger)
        responses.append(response + "\n\n" + trigger + new_response)

100%|██████████| 37/37 [05:05<00:00,  8.25s/it]


In [None]:
no_pool['CoT'] = responses

In [216]:
i=36
print(no_pool['Label'].iloc[i])
print(responses[i])

4A/S
Let's think through this step by step:

1. CAD-RADS:
- The most severe stenosis mentioned is "PL branch orifice - discrete stenosis (75-80%) with noncalcified plaque."
- When presented with a range like '75-80%', we interpret it as the upper end of the range minus 1, so 79%.
- This falls into the CAD-RADS 4A category: 70-99% in a single vessel, Severe stenosis.
- There's no mention of left main stenosis ≥50% or three-vessel disease ≥70%, so it doesn't qualify for 4B.
- Therefore, the CAD-RADS score is 4A.

2. Plaque Burden:
- The report states "Calcium score = 387.53".
- This falls into the P3 category: Severe, 301-999.
- However, the presence of stents (modifier 'S') overrides the calcium score for determining plaque burden.
- According to the requirements, when stents are present, the Plaque Burden should be reported as 'None'.

3. Modifier:
- The report mentions stents in multiple locations: "LAD proximal and RCA proximal - stent in situ" and "PL branch - stent in situ".
- This

In [232]:
data['CoT_from_claude'] =''
for i in range(len(data)):
    if i in pool_idx:
        data.loc[i, 'CoT_from_claude'] = pool[pool['Unnamed: 0.2']==i]['0'].values[0]
    else:
        data.loc[i, 'CoT_from_claude'] = no_pool[no_pool['Unnamed: 0']==i]['CoT'].values[0]

In [237]:
data.to_csv('manyshot_pool_all_from_claude.csv', index=False)

In [233]:
i=60
print(data.iloc[i]['Label'])
print(data.iloc[i]['CoT_from_claude'])

1
Let's think through this step by step:

1. CAD-RADS:
- The report states "Atherosclerosis with no significant stenosis in the coronary arteries."
- The most severe stenosis mentioned is "pLAD tiny calcification with minimal stenosis (<5%)".
- This falls into the CAD-RADS 1 category: 1-24%, Minimal non-obstructive CAD.
- Even though the stenosis is <5%, the presence of calcification indicates some level of atherosclerosis, justifying a CAD-RADS score of 1 rather than 0.
- Therefore, the CAD-RADS score is 1.

2. Plaque Burden:
- The report does not provide a specific coronary calcium score.
- However, it mentions "tiny calcification" in the pLAD.
- This suggests a very low calcium score, which would fall into the 'None' category: 0.
- Therefore, the Plaque Burden is None.

3. Modifier:
- There are no mentions of stents, grafts, high-risk plaques, or other significant abnormalities.
- The study appears to be diagnostic with no limitations mentioned.
- There are no indications for any of

In [234]:
i=61
print(data.iloc[i]['Label'])
print(data.iloc[i]['CoT_from_claude'])

3/P2
Let's think through this step by step:

1. CAD-RADS:
- The most severe stenosis mentioned is "LCx proximal - discrete stenosis (55-65%) with partially calcified plaque."
- When presented with a range like '55-65%', we interpret it as the upper end of the range minus 1, so 64%.
- This falls into the CAD-RADS 3 category: 50-69%, Moderate stenosis.
- Therefore, the CAD-RADS score is 3.

2. Plaque Burden:
- The report states "Calcium score = 170.2".
- This falls into the P2 category: Moderate, 101-300.
- Therefore, the Plaque Burden is P2.

3. Modifier:
- There are no mentions of stents, grafts, or high-risk plaques.
- The study appears to be diagnostic with no mention of motion artifacts or limited evaluation.
- The aortic valvular calcifications mentioned do not qualify for the 'E' modifier as they are not a coronary artery abnormality.
- Therefore, no modifiers are applicable in this case.

### Final Answer (CAD-RADS/Plaque Burden/Modifier): 3/P2/None
