In [2]:
# import requests

# # The raw URL of the .ipynb file on GitHub
# file_url = 'https://raw.githubusercontent.com/facebookresearch/llama-recipes/main/examples/Prompt_Engineering_with_Llama_2.ipynb'

# # The name of the file to save locally
# local_filename = 'Prompt_Engineering_with_Llama_2.ipynb'

# # Make a GET request to fetch the raw content of the notebook
# response = requests.get(file_url)
# response.raise_for_status()  # Ensure the request was successful

# # Open the local file in write-binary mode and write the contents
# with open(local_filename, 'wb') as f:
#     f.write(response.content)

# print(f'Downloaded file saved as: {local_filename}')

In [3]:
# Import all the required packages

import os
import pandas as pd
import numpy as np
import tiktoken
import sidetable
import time
from openai import OpenAI
from tqdm import tqdm
from tenacity import retry, stop_after_attempt, wait_random_exponential
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
# Initialize the OpenAI client with your API key
api_key = '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'
client = OpenAI(api_key=api_key)

In [5]:
# Path to the data files
path = r'C:\Users\m254356\Dropbox\Github\llm_radimpressions\data'

# Creating a list of all the paths for files in different experiments
paths = []
for subfolders in os.listdir(path):
    x = os.path.join(path, subfolders)
    paths.append(x)
    
paths

['C:\\Users\\m254356\\Dropbox\\Github\\llm_radimpressions\\data\\data_fewshot',
 'C:\\Users\\m254356\\Dropbox\\Github\\llm_radimpressions\\data\\data_finetuning',
 'C:\\Users\\m254356\\Dropbox\\Github\\llm_radimpressions\\data\\data_zeroshot',
 'C:\\Users\\m254356\\Dropbox\\Github\\llm_radimpressions\\data\\main_files']

In [9]:
#Reading the sampled datafile for 100 patients
df_100_zs = pd.read_csv(paths[2] + r'\sampled_df_100.csv')

In [11]:
df_100_zs['final_deid'] = df_100_zs['final_deid'].astype(str)

In [12]:
def remove_substring(text, substring):
    index = text.find(substring)
    if index != -1:
        return text[:index]  # Return the text up to the found index
    else:
        return text 

In [15]:
remove = "I, the teaching physician"
df_100_zs['final_deid'] = df_100_zs['final_deid'].apply(lambda x: remove_substring(x, remove))

remove = "ATTESTATION"
df_100_zs['final_deid'] = df_100_zs['final_deid'].apply(lambda x: remove_substring(x, remove))

remove = "Critical results were communicated"
df_100_zs['final_deid'] = df_100_zs['final_deid'].apply(lambda x: remove_substring(x, remove))

remove = "Electronically Signed by "
df_100_zs['final_deid'] = df_100_zs['final_deid'].apply(lambda x: remove_substring(x, remove))

In [19]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
#     encoding = tiktoken.encoding_for_model(encoding_name)
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [21]:
# Encoding name for the  model
# encoding_name = "gpt-4" ## if encoding for a specific model is used
encoding_name = 'cl100k_base'

In [31]:
# Now calculate the number of tokens for each truncated string
df_100_zs['num_tokens'] = df_100_zs['final_deid'].apply(lambda x: num_tokens_from_string(x, encoding_name))

# Calculate the total number of tokens
total_tokens = df_100_zs['num_tokens'].sum()
print(f"Total number of tokens in the dataset: {total_tokens}")

Total number of tokens in the dataset: 77383


In [32]:
prompt_text = "Identify if the following radiology impression text indicates (1) any cancer, (2) progression/worsening, (3) response/improvement, (4) brain metastases, (5) bone/osseous metastases, (6) adrenal metastases, (7) liver/hepatic metastases, (8) lung/pulmonary metastases, (9) lymph node/nodal metastases, (10) peritoneal metastases. Answer in Yes or No. Do not give an explanation"
num_tokens_from_string(prompt_text, encoding_name)

106

In [33]:
df_100_zs['total_num_tokens'] = df_100_zs['num_tokens'] + 106
df_100_zs['total_num_tokens'].sum()

184761

In [34]:
# def truncate_to_token_limit(string: str, encoding_name: str, max_tokens: int) -> str:
#     """Truncates a text string to a specified token limit."""
#     encoding = tiktoken.get_encoding(encoding_name)
#     encoded_string = encoding.encode(string)

#     # Truncate the encoded string to the max_tokens limit
#     if len(encoded_string) > max_tokens:
#         encoded_string = encoded_string[:max_tokens]

#     # Decode back to string (if necessary, depending on how your encoding works)
#     truncated_string = encoding.decode(encoded_string)
#     return truncated_string


# # Set your token limit
# token_limit = 100
# # Truncate each string in the DataFrame to the token limit
# df_100_zs['truncated_text'] = df_100_zs['final_deid'].apply(lambda x: truncate_to_token_limit(x, encoding_name, token_limit))

# # Now calculate the number of tokens for each truncated string
# df_100_zs['num_tokens'] = df_100_zs['truncated_text'].apply(lambda x: num_tokens_from_string(x, encoding_name))

# # Calculate the total number of tokens
# total_tokens = df_100_zs['num_tokens'].sum()

# # Display results
# print(df_100_zs[['truncated_text', 'num_tokens']])

In [36]:
df_100_zs = df_100_zs.sample(n=5, random_state=42)

In [102]:
# Function to create a single GPT-4 API call
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(10))
def create_gpt4_call(content):
    response = client.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[
            {"role": "system", "content": "You are a helpful assistant designed to analyze radiology reports."},
            {"role": "user", "content": content}
        ],
        temperature=1e-12,
        max_tokens=2048,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].message.content.strip()

In [103]:
prompt_text = "Identify if the following radiology impression text indicates (1) any cancer, (2) progression/worsening, (3) response/improvement, (4) brain metastases, (5) bone/osseous metastases, (6) adrenal metastases, (7) liver/hepatic metastases, (8) lung/pulmonary metastases, (9) lymph node/nodal metastases, (10) peritoneal metastases. Answer in Yes or No. Do not give an explanation"

In [104]:
print(prompt_text)

Identify if the following radiology impression text indicates (1) any cancer, (2) progression/worsening, (3) response/improvement, (4) brain metastases, (5) bone/osseous metastases, (6) adrenal metastases, (7) liver/hepatic metastases, (8) lung/pulmonary metastases, (9) lymph node/nodal metastases, (10) peritoneal metastases. Answer in Yes or No. Do not give an explanation


In [105]:
# List of conditions to be checked
conditions = ["any cancer", "progression/worsening", "response/improvement", 
                  "brain metastases", "bone/osseous metastases", "adrenal metastases", 
                  "liver/hepatic metastases", "lung/pulmonary metastases", 
                  "lymph node/nodal metastases", "peritoneal metastases"]

In [106]:
# Function to classify impressions for the fixed set of conditions
def classify_impressions(df, column_name, conditions, prompt_text):
    
    # Reset the index of the DataFrame before running the loop. 
    # This will ensure that the DataFrame has a simple integer-based index which should align with loop's index variable.
    df = df.reset_index(drop=True)
    
    # Initialize a dictionary to store predictions
    predictions = {condition: [] for condition in conditions}
    
    
    # Calculate token count for the static part of the prompt
    static_prompt_tokens = num_tokens_from_string(prompt_text, "cl100k_base")

    # Calculate total number of tokens for all prompts
    total_tokens = sum(df[column_name].apply(lambda x: num_tokens_from_string(f"{prompt_text}\n\n{x}", "cl100k_base")))

    processed_tokens = 0

    # Initialize tqdm with the initial description and total number of rows
    pbar = tqdm(total=len(df), desc="Starting")

    for index, impression in df.iterrows():
        # Constructing the full prompt with impression
        full_prompt = f"{prompt_text}\n\n{impression[column_name]}" 
        
        # Generating a single response for all conditions
        response = create_gpt4_call(full_prompt)

        # Count the number of tokens for the current full prompt
        num_tokens = num_tokens_from_string(full_prompt, "cl100k_base")
        processed_tokens += num_tokens
        tokens_left = total_tokens - processed_tokens

        # Update tqdm description to show both item progress and token count
        pbar.set_description(f"Classifying - {index + 1}/{len(df)} - Tokens Processed: {processed_tokens}, Tokens Left: {tokens_left}")

        # Update progress by one iteration for the item counter
        pbar.update(1)
        
        # Parsing the response to extract labels for each condition
        labels = response.split("\n")
        for i, condition in enumerate(conditions):
            label = labels[i].strip() if i < len(labels) else "No"
            # Extract only the 'Yes' or 'No' part from the label
            label = "Yes" if "Yes" in label else "No"
            predictions[condition].append(label)
   
    # Save intermediate results every nth rows
        if (index + 1) % 50 == 0:
            for condition, condition_predictions in predictions.items():
                processed_predictions = condition_predictions[:index + 1]

                # Get indices of the rows to update
                indices_to_update = df.index[:index + 1]

                # Use .loc to update the original DataFrame
                df.loc[indices_to_update, f'{condition}_predicted'] = np.where(np.array(processed_predictions) == 'Yes', 1, 0)

            # Save the updated part of the DataFrame
            df.iloc[:index + 1].to_csv(f'output_at_row_{index + 1}.csv', index=False)

        
    # Convert 'Yes'/'No' labels to binary (1/0) and add to DataFrame
    for condition, condition_predictions in predictions.items():
        df[f'{condition}_predicted'] = np.where(np.array(condition_predictions) == 'Yes', 1, 0)
        

    pbar.close()
    return df, predictions

In [108]:
df, predictions = classify_impressions(df_100_zs,'final_deid', conditions, prompt_text)

Classifying - 5/5 - Tokens Processed: 900, Tokens Left: 0: 100%|██████████| 5/5 [00:25<00:00,  5.15s/it]


In [109]:
df

Unnamed: 0,patient_id,split,final_deid,any_cancer,progression,response,brain_met,bone_met,adrenal_met,liver_met,...,any cancer_predicted,progression/worsening_predicted,response/improvement_predicted,brain metastases_predicted,bone/osseous metastases_predicted,adrenal metastases_predicted,liver/hepatic metastases_predicted,lung/pulmonary metastases_predicted,lymph node/nodal metastases_predicted,peritoneal metastases_predicted
0,1436,train,No convincing evidence of lung cancer recurrence.,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,1055,train,IMPRESSION: Larger nodules have decreased in...,1,0,1,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
2,2442,train,1. 2.7 cm region of FDG avidity in the panc...,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
3,228,train,1. Overall decreased tumor burden as evidenced...,1,0,1,0,1,0,0,...,1,0,1,0,1,0,0,1,0,0
4,1092,train,Slow progression of clustered perilymphatic no...,1,1,0,0,0,0,0,...,1,1,0,0,0,0,0,1,1,0


In [110]:
# #convert predictions into dataframe

# predictions = pd.DataFrame(_)

# # Function to clean the data
# def clean_data(entry):
#     if 'yes' in entry.lower():
#         return 'Yes'
#     elif 'no' in entry.lower():
#         return 'No'
#     else:
#         return None
    
# predictions = predictions.applymap(clean_data)

In [111]:
predicted_label_cols = [col for col in classified_df.columns if col.endswith('_predicted')]
true_label_cols = classified_df.columns[3:13]

In [112]:
true_label_cols, predicted_label_cols

(Index(['any_cancer', 'progression', 'response', 'brain_met', 'bone_met',
        'adrenal_met', 'liver_met', 'lung_met', 'node_met', 'peritoneal_met'],
       dtype='object'),
 ['any cancer_predicted',
  'progression/worsening_predicted',
  'response/improvement_predicted',
  'brain metastases_predicted',
  'bone/osseous metastases_predicted',
  'adrenal metastases_predicted',
  'liver/hepatic metastases_predicted',
  'lung/pulmonary metastases_predicted',
  'lymph node/nodal metastases_predicted',
  'peritoneal metastases_predicted'])

In [113]:
def evaluate_model_performance(df, true_label_cols, predicted_label_cols):
    for true_label_col, predicted_label_col in zip(true_label_cols, predicted_label_cols):
        # Extract true and predicted labels
        true_labels = df[true_label_col]
        predicted_labels = df[predicted_label_col]

        # Compute the evaluation metrics
        accuracy = accuracy_score(true_labels, predicted_labels)
        precision = precision_score(true_labels, predicted_labels)
        recall = recall_score(true_labels, predicted_labels)
        f1 = f1_score(true_labels, predicted_labels)

        # Print the results
        print(f"Evaluation Metrics for {predicted_label_col}:")
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall: {recall:.4f}")
        print(f"  F1 Score: {f1:.4f}\n")

In [114]:
evaluate_model_performance(classified_df, true_label_cols, predicted_label_cols)

Evaluation Metrics for any cancer_predicted:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Evaluation Metrics for progression/worsening_predicted:
  Accuracy: 0.8000
  Precision: 0.5000
  Recall: 1.0000
  F1 Score: 0.6667

Evaluation Metrics for response/improvement_predicted:
  Accuracy: 0.8000
  Precision: 0.6667
  Recall: 1.0000
  F1 Score: 0.8000

Evaluation Metrics for brain metastases_predicted:
  Accuracy: 1.0000
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000

Evaluation Metrics for bone/osseous metastases_predicted:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Evaluation Metrics for adrenal metastases_predicted:
  Accuracy: 1.0000
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000

Evaluation Metrics for liver/hepatic metastases_predicted:
  Accuracy: 1.0000
  Precision: 0.0000
  Recall: 0.0000
  F1 Score: 0.0000

Evaluation Metrics for lung/pulmonary metastases_predicted:
  Accuracy: 0.8000
  Precision:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [100]:
for true_col, pred_col in zip(true_label_cols, predicted_label_cols):
    classified_df['results_' + true_col] = np.where(classified_df[true_col] == classified_df[pred_col], "Correct", "Incorrect")

In [101]:
classified_df

Unnamed: 0,patient_id,split,final_deid,any_cancer,progression,response,brain_met,bone_met,adrenal_met,liver_met,...,results_any_cancer,results_progression,results_response,results_brain_met,results_bone_met,results_adrenal_met,results_liver_met,results_lung_met,results_node_met,results_peritoneal_met
0,1436,train,No convincing evidence of lung cancer recurrence.,0,0,0,0,0,0,0,...,Correct,Correct,Incorrect,Correct,Correct,Correct,Correct,Correct,Correct,Correct
1,1055,train,IMPRESSION: Larger nodules have decreased in...,1,0,1,0,0,0,0,...,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Incorrect
2,2442,train,1. 2.7 cm region of FDG avidity in the panc...,1,0,0,0,0,0,0,...,Correct,Incorrect,Correct,Correct,Correct,Correct,Correct,Incorrect,Correct,Correct
3,228,train,1. Overall decreased tumor burden as evidenced...,1,0,1,0,1,0,0,...,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct
4,1092,train,Slow progression of clustered perilymphatic no...,1,1,0,0,0,0,0,...,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct,Correct


In [None]:
classified_df.to_csv("classified_df_200.csv")