**MIMIC Visualization (AI 395T) - Seth Zapata (shz256)**

## Set environment variable for Claude API key

See https://console.anthropic.com/settings/keys to get started

In [None]:
import os

os.environ['ANTHROPIC_API_KEY'] = '############' # Redacted

In [None]:
import pandas as pd
from anthropic import Anthropic

## Connection to Google Drive

In [None]:
from google.colab import drive
basePath = "/content/drive/My Drive/Colab Notebooks/LLM Tutorial/"
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


## Anthropic package installation

In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.39.0-py3-none-any.whl.metadata (22 kB)
Downloading anthropic-0.39.0-py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.4/198.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.39.0


## Set Anthropic API Key

In [None]:
anthropic = Anthropic(
  api_key=os.environ.get('ANTHROPIC_API_KEY')
)

In [None]:
messages = [] # Mock history storage to mimic conversation

## Send and retrieve message with history support

In [None]:
def send_message(message=""):

  specific_message = {"role": "user", "content": message}

  messages.append(specific_message)

  print(messages)

  message = anthropic.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1024,
    messages=messages,
  )


  claude_response = {"role": "assistant", "content": message.content[0].text}
  messages.append(claude_response)

  return message.content[0].text

In [None]:
print(send_message("What was my last message to you?"))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}]
The last message you sent me was "What was my last message to you?"


## Retrieve medical dataset files

In [None]:
medical_dict = {
    'allergies': pd.read_csv(basePath + 'medical_data/allergies.csv'),
    'careplans': pd.read_csv(basePath + 'medical_data/careplans.csv'),
    'claims': pd.read_csv(basePath + 'medical_data/claims.csv'),
    'claims_transactions': pd.read_csv(basePath + 'medical_data/claims_transactions.csv'),
    'conditions': pd.read_csv(basePath + 'medical_data/conditions.csv'),
    'devices': pd.read_csv(basePath + 'medical_data/devices.csv'),
    'encounters': pd.read_csv(basePath + 'medical_data/encounters.csv'),
    'imaging_studies': pd.read_csv(basePath + 'medical_data/imaging_studies.csv'),
    'immunizations': pd.read_csv(basePath + 'medical_data/immunizations.csv'),
    'medications': pd.read_csv(basePath + 'medical_data/medications.csv'),
    'observations': pd.read_csv(basePath + 'medical_data/observations.csv'),
    'organizations': pd.read_csv(basePath + 'medical_data/organizations.csv'),
    'patients': pd.read_csv(basePath + 'medical_data/patients.csv'),
    'payer_transitions': pd.read_csv(basePath + 'medical_data/payer_transitions.csv'),
    'payers': pd.read_csv(basePath + 'medical_data/payers.csv'),
    'procedures': pd.read_csv(basePath + 'medical_data/procedures.csv'),
    'providers': pd.read_csv(basePath + 'medical_data/providers.csv'),
    'supplies': pd.read_csv(basePath + 'medical_data/supplies.csv'),
}

In [None]:
print(f"""{medical_dict['allergies']}""")

         START  STOP                               PATIENT  \
0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   
1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   
2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   
3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   
4   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   
..         ...   ...                                   ...   
87  2021-05-11   NaN  de480ca4-19a6-f2e0-7922-1c51e7c8dcb8   
88  2021-05-11   NaN  de480ca4-19a6-f2e0-7922-1c51e7c8dcb8   
89  2021-05-11   NaN  de480ca4-19a6-f2e0-7922-1c51e7c8dcb8   
90  1988-02-01   NaN  14dc5e57-1b84-3305-c042-86c9fc7e4996   
91  1963-02-20   NaN  d426334c-a982-3a31-7e0f-ca3c7fe01310   

                               ENCOUNTER       CODE     SYSTEM  \
0   f5242c34-8841-269a-50c5-c6f623575955  419199007  SNOMED-CT   
1   f5242c34-8841-269a-50c5-c6f623575955   84489001  SNOMED-CT   
2   f5242c34-8841-269a-50c5-c6f623575955  264287008  SNOM

## 1st prompt: Data evaluation and summarization

In [None]:
prompt = f"""
I am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.

Information on allergies: {medical_dict['allergies']}
Information on careplans: {medical_dict['careplans']}
Information on claims: {medical_dict['claims']}
Information on claims_transactions: {medical_dict['claims_transactions']}
Information on conditions: {medical_dict['conditions']}
Information on devices: {medical_dict['devices']}
Information on encounters: {medical_dict['encounters']}
Information on imaging_studies: {medical_dict['imaging_studies']}
Information on immunizations: {medical_dict['immunizations']}
Information on medications: {medical_dict['medications']}
Information on observations: {medical_dict['observations']}
Information on organizations: {medical_dict['organizations']}
Information on patients: {medical_dict['patients']}
Information on payer_transitions: {medical_dict['payer_transitions']}
Information on payers: {medical_dict['payers']}
Information on procedures: {medical_dict['procedures']}
Information on providers: {medical_dict['providers']}
Information on supplies: {medical_dict['supplies']}

"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

## 2nd prompt: Textualization of tabular data

In [None]:
prompt = """
Please convert the tabular data of the table "claims_transactions" into text format". Please be thorough and double check your text output with the given dataframe for the corresponding table.
"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

## 3rd prompt: Set guidelines for procedure prediction task

Here, we use chain-of-thought (COT) technique to guide Claude LLM towards the desired response.

In [None]:
prompt="""
Your task is to make predictions of what type of procedure a patient may be administered depending on a variety of factors, including but not limited to:
  - Type of medical condition for a patient
  - Type of administered medication for a patient
  - Type of encounter a patient has and corresponding reason
  - Relevant demographic information (i.e. age, race, gender, etc.)
  - Etc.

There are 106 unique patients that can be found in the patient table. You will train (i.e. learn) on the first 74 patients and their corresponding relevant data needed to
learn as part of the initial task found in the other tables provided. You will be learning the most pertinent determinant factors that can be used to most accurately estimate
the type of procedure a patient may be administered.

One key point to ensure you are reading patient data consistently is to make sure you match the 'Id' column value in the patients table to any other table you deem necessary to
learn the most pertinent determinant factors that can be used to most accurately estimate the type of procedure a patient may be administered. If there are any other "foreign keys"
besides 'Id' that correlate a specific patient's data across multiple tables, be sure to keep track of that as well, if applicable

You should take the time to think as deeply and as carefully as possible about how to most accurately make these predictions based on the guidelines and provided data. You will
then verify your own intermediate findings by checking other patient information to see if your training is correct. This will be the last 32 patients in the patients table.
You will make predictions on these 32 patients for their most likely type of procedure without actually looking at their true procedure type in the procedure tables, and then
output a chart of whether you were wrong or right, your reasoning on your choice, and how you would improve if applicable for future predictions. You can add more relevant
information here if it would be helpful as part of an prediction accuracy analysis.

You can follow and improve on the provided format here:

PATIENT     |     PREDICTION (procedure)     |     ACTUAL (checking procedure table for PATIENT ID)     |              Justification            |     Improvement for future predictions
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
00000000-...|    Brief general examination   |                  Depression screening                    |   XXX medication + age + income, etc. |   [Look at real patient data, and describe improvement here]
00000001-...|    Assessment of anxiety       |                  Assessment of anxiety                   |   XX medication and YY encounter      |     N/A

Again, only "train" on the first 74 patients, and don't view any data for the last 36 patients until you get to step where you must compare your prediction to the actual procedure. In other words,
you should make your predictions of the last 36 patients a set of "blind" predictions, where you are missing the procedure table essentially. Only during training/learning can you use all of the tables.
Your table output should be of shape 37 x 5, where the first row are the column title headers, and the rest of the rows contain patient prediction of procedure as outlined above.

Again, remember to think thoroughly and deeply when training/learning the most determinant and relevant factors in predicting the type of procedure for patients.
"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

##4th prompt: Answer clarifications (pt.1)

In [None]:
prompt="""
To answer your questions:
  1. No, you are not temporally restricted on the dataset, feel free to use data from any date ranges you find applicable to the task
  2. Yes, this can be one of the factors for you to consider if necessary. It will be up to you to decide the "weight" of this factor on any given patient, as some patients may less events
  overall in their history (i.e. this whole set of data), and whether this will contribute to the task at hand.
  3. Let's focus on the most frequent procedure for patients. If you feel it is valuable, you can include other relevant procedures as part of your training and predictions results. You can
  even create an appropriate categorical label for procedures overall if you identify a trend that many patients have one or more of the same type of procudures. For example, 'Depression screening',
  'Assessment of anxiety' might both be categorically labeled as 'Mental Health Examination', and you could make your training and predictions as this label if applicable. If you do decide to go
  this route, then you must include the specific procedures identified as part of your prediction (i.e. Depression screening and Assessment of Anxiety must be specified like above)

"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

##5th prompt: Answer clarifications (pt.2)

In [None]:
prompt="""
Yes, please show your intermediate analysis of the patterns you find. This will be valuable to me and you in understanding how to best make procudure predictions, so we should not ignore it.

To answer your numbered questions:
  1. If you find that this information is relevant to the task at hand, then by all means. You may find that it is simpler to look at REASONDESCRIPTION for a textual understanding of the REASONCODE
  for example, so in this case, it would be to your benefit to ignore REASONCODE altogether. I'll leave these determinations up to you.
  2. If you feel that the time range of administered medication plays a significant role in procedure prediction, then by all means. I'll leave these determinations up to you.
  3. Yes go ahead and include confidence levels in your predictions. You can make this another column in the chart that I gave as an example.

Please proceed with the complete analysis and prediction table.
"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

##6th prompt: Answer clarifications (pt. 3)

In [None]:
prompt="""
Yes, please show any intermediate results from here on out. Your next response should contain the rest of your complete process, including:
  1. Your intermediate results
  2. Your analysis of the above points
  3. The final prediction table

No need to ask any further clarifications or question, please proceed with the prediction task in full. Do not limit your response in any way, please be as verbose as needed to display
all of the relevant information.
"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  

##Final prompt: Complete procedure prediction table

In [None]:
prompt="""
Yes, please complete the prediction table for all 32 remanining patients. It's alright that you are not able to show the actual data analysis, as long as you are able to explain your process
at lengthy detail, that is sufficient.
"""

In [None]:
print(send_message(prompt))

[{'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'I cannot see any earlier messages in our conversation. Each conversation with me starts fresh, so I can only see what you share with me in our current interaction.'}, {'role': 'user', 'content': 'What was my last message to you?'}, {'role': 'assistant', 'content': 'The last message you sent me was "What was my last message to you?"'}, {'role': 'user', 'content': '\nI am sending you a set of medical information as .csv files, and I would like you to summarize your initial evaluation of the data. Please be thorough and contextual in your response.\n\nInformation on allergies:          START  STOP                               PATIENT  \\\n0   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n1   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n2   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n3   2014-07-26   NaN  4569671e-ed39-055f-8e78-422b96c9896b   \n4  