In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser
from langchain_ollama import OllamaLLM
import pandas as pd
from datetime import datetime
from langchain_community.llms import Ollama
import numpy as np
import pandas as pd
import json
import re
from langchain.schema import Document
from langgraph.graph import END, StateGraph
from langchain_community.tools.tavily_search import TavilySearchResults
from typing_extensions import TypedDict
from typing import List

In [None]:
#stitching 
df = pd.read_csv(r'C:\Users\yens01\Projects\Frailty\Frailty Notes 60D 5.27.2025.csv')

In [None]:
#Importing OpenAI's token counter
import tiktoken
encoding = tiktoken.get_encoding("o200k_base")
encoding = tiktoken.encoding_for_model("gpt-4o")
#Function for token counter
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
stitched = df.groupby(['person_id','MRN','Validated_RAI_Score','Dt','Age'])['note_text'].agg('/n/n'.join).reset_index()
#Applying counter to df
stitched['token_count'] = stitched['note_text'].apply(lambda x: num_tokens_from_string(x, "o200k_base"))

In [None]:
#Excluding high token count 
high_count_stitched = stitched[stitched['token_count'] > 100000]
stitched = stitched[stitched['token_count'] <= 100000]

In [None]:
#Connecting to Minervas ollama
LLM = OllamaLLM(model="qwen3:30b", temperature=0, base_url= MINERVA_API, num_ctx = 100000)

1. Determine if the patient has active cancer
2. Calculate age of patient and give it a score
3. Determine the sex of patient and give it a score
4. Calculate Co-Morbidities score. Determine if patient had unintentional weight loss of more than 10lbs in the past 3 months, if the patient had renal failure, if the patient had chronic/      congestive heart failure, if the patient had poor appetite, and if the patient had shortness of breathe at rest.
5. Determine if the patient reside in a setting other than independent living and give it a score.
6. Calculate Activities of Daily Living & Cognitive Decline score. Determine the state of Mobility/Locomotion, Eating, Toilet Use, and Personal hygiene for the patient. 
7. Determine if the patient’s cognitive skills or status deteriorated over the past 3 months and give it a score depending on category #6. 

In [None]:
#Determine if the patient has active cancer
Cancer_prompt = PromptTemplate(
    template = """
You are an oncologist and an expert in analyzing clinical notes.

Based on the clinical notes below, make the following determinations:

1. Whether the patient has active cancer and specify the type of cancer.
2. Whether the patient has any type of active skin cancer.
3. Whether the patient has any type of active melanoma.

Return ONLY a valid JSON object with these keys:
- "active_cancer" (boolean)
- "cancer_type" (string, or empty string if none)
- "active_skin_cancer" (boolean)
- "active_melanoma_cancer" (boolean)

Do NOT include any explanation, markdown formatting, or extra text beyond the JSON object.

CLINICAL NOTES:
{clinical_notes}
""",
    input_variables=["clinical_notes"],
)

cancer_category_generator = Cancer_prompt | LLM | StrOutputParser()




In [None]:
#Determine if the patient is male or female
sex_prompt = PromptTemplate(
    template = """
You are a medical doctor and an expert in analyzing clinical notes.

Based on the clinical notes below, determine the patient's sex.

Return ONLY a valid JSON object with this key:
- "sex" (value must be either "male" or "female")

Do NOT include any explanation, markdown formatting, or extra text beyond the JSON object.

CLINICAL NOTES:
{clinical_notes}
    """,
    input_variables = ["clinical_notes"],
)

sex_category_generator = sex_prompt | LLM | StrOutputParser()

In [None]:
#Determine Medical Co-Morbidities 
Morbidities_prompt = PromptTemplate(
    template = """
You are a medical doctor and an expert in analyzing clinical notes.

Based on the clinical notes below, determine the following:

- Whether the patient had unintentional weight loss of more than 10 lbs in the past 3 months.
- Whether the patient had renal failure.
- Whether the patient had chronic or congestive heart failure.
- Whether the patient had poor appetite.
- Whether the patient had shortness of breath at rest.

For each, return true if there is evidence, otherwise false.

Return ONLY a valid JSON object with these keys:
- "unintentional_weight_loss"
- "renal_failure"
- "heart_failure"
- "poor_appetite"
- "shortness_of_breath"

Do NOT include any explanation, markdown formatting, or extra text beyond the JSON object.

CLINICAL NOTES:
{clinical_notes}
    """,
    input_variables = ["clinical_notes"],
)

morbidities_category_generator = Morbidities_prompt | LLM | StrOutputParser()

In [None]:
#Determine Independent living 
Independent_prompt = PromptTemplate(
    template = """
You are a medical doctor.

Read the clinical notes and determine if there is evidence that the patient resides in:

- a skilled nursing facility
- assisted living
- a nursing home

Return only a valid JSON object with these keys:
- "skilled_nursing_facility"
- "assisted_living"
- "nursing_home"

Set each value to true if there is evidence in the clinical notes, otherwise false.

Return only the JSON object. 
Do not include any markdown formatting (e.g., triple backticks).
Do not include any explanation or extra text.

CLINICAL NOTES:
{clinical_notes}
    """,
    input_variables = ["clinical_notes"],
)

independent_category_generator = Independent_prompt | LLM | StrOutputParser()


In [None]:
#Determine activities 
activities_prompt = PromptTemplate(
    template = """
You are a medical doctor and an expert in analyzing clinical notes.

Based on the clinical notes below, determine the patient's current level of functioning in the following areas:

- Mobility/locomotion
- Eating
- Toilet use
- Hygiene

For each, return one of the following values only:
"independent", "supervised", "limited assistance", "extensive assistance", or "total dependence".

Return ONLY a valid JSON object with these keys:
- "mobility_locomotion"
- "eating"
- "toilet_use"
- "hygiene"

Do NOT include any explanation, markdown formatting, or extra text beyond the JSON object.

CLINICAL NOTES:
{clinical_notes}
    """,
    input_variables = ["clinical_notes"],
)

activities_category_generator = activities_prompt | LLM | StrOutputParser()

In [None]:
#Determine activities 
cog_decline_prompt = PromptTemplate(
    template = """
You are a medical doctor and an expert in analyzing clinical notes.

Based on the clinical notes below, determine if the patient's cognitive skills or status has deteriorated over the past 3 months.

Return ONLY a valid JSON object with this key:
- "cognitive_decline" (value must be either true or false)

Do NOT include any explanation, markdown formatting, or extra text beyond the JSON object.

CLINICAL NOTES:
{clinical_notes}
    """,
    input_variables = ["clinical_notes"],
)

cog_decline_category_generator = cog_decline_prompt | LLM | StrOutputParser()

In [None]:
#Setting up State
class GraphState(TypedDict):
    clinical_notes: str
    cancer_output: str
    activities_output: str
    independent_output: str
    morbidities_output: str
    cog_decline_output: str
    age_input : str  #from input
    sex_output: str
    #cog_decline: str
    RAI_score: int
    age_sex_cancer_score: int
    medical_comorbidities_score : int 
    cradl_score : int 
    adlcd_score : int
    FINAL_RESULT: dict   
    

Nodes 
1. Age, Sex, and Cancer
2. Medical Co-Morbidities
3. Cognition, Residence, and Activity of Daily Living
4. Activities of Daily Living and Cognitive Decline
5. state_printer


In [None]:
# Age, sex, and cancer V2, hard code age 
def age_sex_cancer(state):
    #"""look at the age, sex, and cancer chain and calculate score"""
    #print("--CALCULATING AGE, SEX, AND CANCER CATEGORY SCORE--")
    clinical_notes = state['clinical_notes']
    age = state['age_input']
    age = int(age)
    age_sex_cancer_score = 0 

    cancer_output = json.loads(cancer_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())
    sex_output = json.loads(sex_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())  

    if  not isinstance(cancer_output, dict) or not isinstance(sex_output, dict):
        return {
            "age_sex_cancer_score": 0,  # Safe for sum() operations
            "age_input" : age,
            "cancer_output": cancer_output,
            "sex_output": sex_output
        }
    
    if cancer_output.get("active_cancer") is False or (cancer_output.get('active_skin_cancer') is True and cancer_output.get('active_melanoma_cancer') is False)  : #no active cancer or active skin cancer but not melanoma
        if age <= 69:
            age_sex_cancer_score = 2
        elif 70 <= age <= 74:
            age_sex_cancer_score = 3
        elif 75 <= age <= 79:
            age_sex_cancer_score = 4
        elif 80 <= age <= 84:
            age_sex_cancer_score = 5
        elif 85 <= age <= 89:
            age_sex_cancer_score = 6
        elif 90 <= age <= 94:
            age_sex_cancer_score = 7
        elif 95 <= age <= 99:
            age_sex_cancer_score = 8
        elif age >= 100:
            age_sex_cancer_score = 9
    elif cancer_output.get("active_cancer") is True or (cancer_output.get("active_melanoma_cancer") is True): #active cancer or melanoma
        if age <= 69:
            age_sex_cancer_score = 20
        elif 70 <= age <= 74:
            age_sex_cancer_score = 19
        elif 75 <= age <= 79:
            age_sex_cancer_score = 18
        elif 80 <= age <= 84:
            age_sex_cancer_score = 17
        elif 85 <= age <= 89:
            age_sex_cancer_score = 16
        elif 90 <= age <= 94:
            age_sex_cancer_score = 15
        elif 95 <= age <= 99:
            age_sex_cancer_score = 14
        elif age >= 100:
            age_sex_cancer_score = 13
    
    if sex_output.get('sex') == 'male':
        age_sex_cancer_score += 5
    
    return {"age_sex_cancer_score" : age_sex_cancer_score
            , "age_input" : age
            , "cancer_output" : cancer_output
            , "sex_output" : sex_output
             }


In [None]:
#Medical Co-Morbidities
def medical_comorbidities(state):
    print("--CALCULATING MEDICAL CO-MORBIDITIES SCORE--")
    clinical_notes = state['clinical_notes']
    medical_comorbidities_score = 0 

    morbidities_output = json.loads(morbidities_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())

    if not isinstance(morbidities_output, dict):
        return {"medical_comorbidities_score" : medical_comorbidities_score, "morbidities_output" : "JSON ERROR"}

    if morbidities_output.get('unintentional_weight_loss') is True:
        medical_comorbidities_score += 5
    if morbidities_output.get('renal_failure') is True:
        medical_comorbidities_score += 6
    if morbidities_output.get('heart_failure') is True:
        medical_comorbidities_score += 4
    if morbidities_output.get('poor_appetite') is True:
        medical_comorbidities_score += 4
    if morbidities_output.get('shortness_of_breath') is True:
        medical_comorbidities_score += 8

    return {"medical_comorbidities_score": medical_comorbidities_score, "morbidities_output" : morbidities_output}
    

In [None]:
# Cognition, Residence, and Activity of Daily Living (CRADL)
def CRADL(state):
    print("--CALCULATING Cognition, Residence, and Activity of Daily Living Score--")
    clinical_notes = state['clinical_notes']
    cradl_score = 0 

    independent_output = json.loads(independent_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())

    if not isinstance(independent_output, dict):
        return {"cradl_score" : cradl_score, "independent_output" : "JSON ERROR"}

    if independent_output.get('skilled_nursing_facility') is True or independent_output.get('assisted_living') is True or independent_output.get('nursing_home') is True:
        cradl_score = 8
    
    return {"cradl_score" : cradl_score, "independent_output" : independent_output}

In [None]:
#Activities of Daily Living and Cognitive Decline
def ADLCD(state):
    print ("--CALCULATING ACTIVITIES OF DAILY LIVING AND COGNITIVE DECLINE SCORE--")
    clinical_notes = state['clinical_notes']
    adlcd_score = 0

    activities_output = json.loads(activities_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())
    cog_decline_output = json.loads(cog_decline_category_generator.invoke({"clinical_notes": clinical_notes}).split('</think>').pop())

    if not isinstance(activities_output, dict) or not isinstance(cog_decline_output, dict):
        return {"adlcd_score" : adlcd_score, "activities_output" : "JSON ERROR", "cog_decline_output" : "JSON ERROR"}

    status_scores = {
        'independent' : 0,
        'supervised' : 1,
        'limited assistance' : 2,
        'extensive assistance' : 3,
        'total dependence' : 4
    }
    
    for activities, status in activities_output.items():
        if status in status_scores:
            adlcd_score += status_scores[status]
    
    if cog_decline_output.get('cognitive_decline') is True:
        if adlcd_score == 0:
            adlcd_score -= 2
        elif adlcd_score in (1,2):
            adlcd_score -= 1
        elif 5 <= adlcd_score <= 7:
            adlcd_score += 1
        elif adlcd_score in (8,9):
            adlcd_score += 2
        elif adlcd_score in (10,11):
            adlcd_score += 3
        elif adlcd_score in (12,13):
            adlcd_score += 4
        elif 14 <= adlcd_score <= 16:
            adlcd_score += 5 


    return {"adlcd_score" : adlcd_score, "activities_output" : activities_output, "cog_decline_output" : cog_decline_output}


In [None]:
#Summing up the RAI score
def RAI_SCORE(state): 
    print ("--CALCULATING RAI SCORE--")
    adlcd_score = state['adlcd_score']
    cradl_score = state['cradl_score']
    medical_comorbidities_score = state['medical_comorbidities_score']
    age_sex_cancer_score = state['age_sex_cancer_score']
    age_input = state['age_input']
    sex_output = state['sex_output']
    cancer_output = state['cancer_output']
    morbidities_output = state['morbidities_output']
    activities_output = state['activities_output']
    independent_output = state['independent_output']
    cog_decline_output = state['cog_decline_output']
    RAI_score = 0

    RAI_score = adlcd_score + cradl_score + medical_comorbidities_score + age_sex_cancer_score
    
    return {"RAI_score": RAI_score}

In [None]:
def RESULT(state):
    print("--SHOWING RESULTS--")
    RAI_score = state['RAI_score']
    adlcd_score = state['adlcd_score']
    cradl_score = state['cradl_score']
    medical_comorbidities_score = state.get('medical_comorbidities_score')
    age_sex_cancer_score = state['age_sex_cancer_score']
    age_input = state['age_input']
    sex_output = state['sex_output']
    cancer_output = state['cancer_output']
    morbidities_output = state['morbidities_output']
    activities_output = state['activities_output']
    independent_output = state['independent_output']
    cog_decline_output = state['cog_decline_output']
   
    FINAL_RESULT = {key: value for key, value in state.items() if key != 'clinical_notes'} #excluding clinical notes in final state

    return FINAL_RESULT

Building Graph

In [None]:
workflow = StateGraph(GraphState)

#Defining nodes
workflow.add_node("Age, Sex, and Cancer", age_sex_cancer)
workflow.add_node("Medical Co-Morbidities",medical_comorbidities)
workflow.add_node("Cognition, Residence & Activity of Daily Living (CRADL)",CRADL)
workflow.add_node("Activities of Daily Living & Cognitive Decline (ADLCD)", ADLCD)
workflow.add_node("Calculate the RAI score", RAI_SCORE)
workflow.add_node("Results", RESULT)

In [None]:
#Defining edges, multiple entry points  
workflow.set_entry_point("Age, Sex, and Cancer")
workflow.set_entry_point("Medical Co-Morbidities")
workflow.set_entry_point("Cognition, Residence & Activity of Daily Living (CRADL)")
workflow.set_entry_point("Activities of Daily Living & Cognitive Decline (ADLCD)")
workflow.add_edge("Age, Sex, and Cancer","Calculate the RAI score")
workflow.add_edge("Medical Co-Morbidities","Calculate the RAI score")
workflow.add_edge("Cognition, Residence & Activity of Daily Living (CRADL)","Calculate the RAI score")
workflow.add_edge("Activities of Daily Living & Cognitive Decline (ADLCD)","Calculate the RAI score")
workflow.add_edge("Calculate the RAI score", "Results")
workflow.add_edge("Results", END)

In [None]:
#Compile
app = workflow.compile()

In [None]:
#Running LLM through the DF
stitched_llm = stitched.copy()
#Function
def agent(notes, age_input):
    input_data = {
        "clinical_notes": notes,
        "age_input": age_input
    }
    try:
        output = app.invoke(input_data)
    except Exception as e:
        return "Invalid Output"
    else:
        return output 


In [None]:
def func(row):
    print(f"Calculating for MRN : {row['MRN']}")
    return agent(row['note_text'], row['Age'])

stitched_llm['llm_output'] = stitched_llm.apply(func, axis=1)

In [None]:
stitched_llm_test = stitched_llm.copy()

In [None]:
#adding llm outputs as new 
llm_output_columns = list(stitched_llm_test.iloc[0,7].keys())
llm_output_columns_add = ['llm_' + col for col in llm_output_columns]  
stitched_llm_test[llm_output_columns] = None

#matching llm output keys:values  to their corresponding columns
stitched_llm_test[list(stitched_llm_test['llm_output'].iloc[0].keys())] = (
    stitched_llm_test.apply(lambda row: pd.Series(row['llm_output']), axis=1)
)

#renaming columns
rename_dict = dict(zip(llm_output_columns, llm_output_columns_add))
stitched_llm_test = stitched_llm_test.rename(columns=rename_dict)

In [None]:
# Determine frail/not frail base off LLM output. Frail if RAI >= 21 
stitched_llm_test['LLM_frail_yn'] = stitched_llm_test['llm_RAI_score'].apply(lambda x: 1 if x >= 21 else 0)

In [None]:
to_csv = stitched_llm_test.drop(columns= ['note_text','llm_clinical_notes','llm_output'])

In [None]:
# to_csv.to_csv(r'C:\Users\yens01\Projects\Frailty\LLM_OUTPUT\stitched_final_llm_agent_qwen3 30b FINAL 5.30.2025.csv', index= False)

In [None]:
stitched_llm_test = pd.read_csv(r'C:\Users\yens01\Projects\Frailty\LLM_OUTPUT\FINAL OUTPUT\stitched_final_llm_agent_qwen3 30b FINAL 5.30.2025.csv')

In [None]:
#Bringing back the patient reported RAI scores 
redcap_cohort = pd.read_excel(r'C:\Users\yens01\Projects\Frailty\REDCap_Frailty_Cohort_Final 4.4.2025.xlsx')

In [None]:
# Matching same dtype
stitched_llm_test['MRN'] = stitched_llm_test['MRN'].astype(str).str.strip()
redcap_cohort['MRN'] = redcap_cohort['MRN'].astype(str).str.strip()

# Wanted columns
redcap_cols = [
    'MRN', 'Total RAI Score', 'Age', "Patient's age score with cancer included", 'Sex',
    'Unintentional weight loss in the past three months?', 'Renal failure?',
    'Chronic/congestive heart failure?', 'Poor appetite?', 'Shortness of breath (at rest)?',
    'Reside in a setting other than independent living?', 'Mobility/Locomotion',
    'Eating', 'Toilet Use', 'Personal Hygeine',
    'Have cognitive skills/status deteriorated over the past 3 months?',
    'ADL Score (sum of ADL scores) with/without cognitive decline consideration'
]

# Selecting the specified columns
redcap_cohort[redcap_cols]


In [None]:
merged = pd.merge(stitched_llm_test, redcap_cohort[redcap_cols], on= 'MRN', how = 'inner')

In [None]:
merged['Validated_frail_yn'] = merged['Validated_RAI_Score'].apply(lambda x: 1 if x >= 21 else 0)

In [None]:
for col in merged.columns:
    if col.startswith('llm'):
        error_count = merged[col].isna().sum() + (merged[col] == 'JSON ERROR').sum()
        if error_count > 0:
            print(f"{col} : {error_count}")

In [None]:
merged['Validated_frail_yn'].value_counts()

In [None]:
#mean of not frail patients 
merged[['Validated_RAI_Score', 'llm_RAI_score']][merged['Validated_frail_yn'] == 0].mean()

In [None]:
#mean of frail patients 
merged[['Validated_RAI_Score', 'llm_RAI_score']][merged['Validated_frail_yn'] == 1].mean()

In [None]:
merged[(merged['llm_age_sex_cancer_score'] == 0) | merged['llm_age_sex_cancer_score'].isna()]

In [None]:
merged = merged[(merged['llm_age_sex_cancer_score'] != 0) & merged['llm_age_sex_cancer_score'].notna()].reset_index(drop = True)

In [None]:
from sklearn.metrics import classification_report
target_names = ['Not Frail', 'Frail']
y_true = merged['Validated_frail_yn']
y_pred = merged['LLM_frail_yn']
print("qwen2.5 72b Agentic LLM Frail Classification Report from RAI Survey:")
print(classification_report(y_true,y_pred, target_names = target_names))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=[0, 1])  
cm = confusion_matrix(y_true, y_pred, labels=[1, 0])
classes = ['Frail', 'Not Frail']

fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)

ax.set(xticks=np.arange(len(classes)),
       yticks=np.arange(len(classes)),
       xticklabels=classes,
       yticklabels=classes,
       xlabel='LLM Classification',
       ylabel='Validated Classification')

# Match TP/FP/FN/TN to new order
labels = np.array([['TP', 'FN'], ['FP', 'TN']])
thresh = cm.max() / 2.

for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, f"{labels[i, j]}\n{cm[i, j]}",
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black")

plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()


In [None]:
#pull accuracy for metricsdf
accuracy = classification_report(y_true,y_pred, target_names = target_names, output_dict= True).get('accuracy')
accuracy

In [None]:
from sklearn.metrics import average_precision_score
pr_auc = average_precision_score(y_true, y_pred)
print(f"PR AUC: {pr_auc}")

In [None]:
precision = classification_report(y_true,y_pred, target_names = target_names, output_dict= True).get('Frail').get('precision')
recall = classification_report(y_true,y_pred, target_names = target_names, output_dict= True).get('Frail').get('recall')
f1 = classification_report(y_true,y_pred, target_names = target_names, output_dict= True).get('Frail').get('f1-score')

In [None]:
from sklearn.metrics import root_mean_squared_error
y_true = merged['Validated_RAI_Score']
y_pred = merged['llm_RAI_score']
print("Agentic LLM:")
print("RAI score RMSE:",root_mean_squared_error(y_true,y_pred))
RAI_score_rmse = root_mean_squared_error(y_true,y_pred)

In [None]:
#Next, we can look at categories?
merged[['llm_age_sex_cancer_score',
       'llm_medical_comorbidities_score', 'llm_cradl_score', 'llm_adlcd_score']]

In [None]:
#Next, we can look at categories?
merged[['llm_age_sex_cancer_score',
       'llm_medical_comorbidities_score', 'llm_cradl_score', 'llm_adlcd_score']]

In [None]:
redcap_cols = [
    'MRN', 'Total RAI Score', 'Age', "Patient's age score with cancer included", 'Sex',
    'Unintentional weight loss in the past three months?', 'Renal failure?',
    'Chronic/congestive heart failure?', 'Poor appetite?', 'Shortness of breath (at rest)?',
    'Reside in a setting other than independent living?', 'Mobility/Locomotion',
    'Eating', 'Toilet Use', 'Personal Hygeine',
    'Have cognitive skills/status deteriorated over the past 3 months?',
    'ADL Score (sum of ADL scores) with/without cognitive decline consideration'
]

for i in range(len(merged)):
    sex_score = 0
    age_score = 0
    if merged.iloc[i]['Sex'] == 'Male':
        sex_score = 5
    if pd.isna(merged.iloc[i]["Patient's age score with cancer included"]):
        pass
    else:
        age_score = merged.iloc[i]["Patient's age score with cancer included"]

    merged.at[i,'Validated_Age_Sex_Cancer_Score'] = (age_score + sex_score)


In [None]:
merged[['Validated_Age_Sex_Cancer_Score','llm_age_sex_cancer_score']]
y_true = merged['Validated_Age_Sex_Cancer_Score']
y_pred = merged['llm_age_sex_cancer_score']

print(merged[['Validated_Age_Sex_Cancer_Score','llm_age_sex_cancer_score']])
print("Age Sex Cancer Score RMSE:",root_mean_squared_error(y_true,y_pred))

age_sex_cancer_score_rmse = root_mean_squared_error(y_true,y_pred)

In [None]:
merged['llm_adlcd_score']
merged[["ADL Score (sum of ADL scores) with/without cognitive decline consideration"]]
y_true = merged["ADL Score (sum of ADL scores) with/without cognitive decline consideration"]
y_pred = merged['llm_adlcd_score']
print("llm_adlcd_score RMSE:",root_mean_squared_error(y_true,y_pred))
adl_score_rmse = root_mean_squared_error(y_true,y_pred)

In [None]:
#adding the medical comorbidities together 
def extract_number(text):
    numbers = re.findall(r'[0-9]+', str(text))  # Find all numbers
    return int(''.join(numbers)) # if numbers else 0  # Convert to int or return 0

# Apply function to each relevant column and sum the values
merged['Validated_medical_comorbidities_score'] = (
    merged['Unintentional weight loss in the past three months?'].apply(extract_number) +
    merged['Renal failure?'].apply(extract_number) +
    merged['Chronic/congestive heart failure?'].apply(extract_number) +
    merged['Poor appetite?'].apply(extract_number) +
    merged['Shortness of breath (at rest)?'].apply(extract_number)
)


In [None]:
y_true = merged["Validated_medical_comorbidities_score"]
y_pred = merged['llm_medical_comorbidities_score']

print("medical_comorbidities_score RMSE:",root_mean_squared_error(y_true,y_pred))
medicial_comorbidities_score_rmse  = root_mean_squared_error(y_true,y_pred)

In [None]:
# translating cradl 
def extract_number(text):
    numbers = re.findall(r'[0-9]+', str(text))  # Find all numbers
    return int(''.join(numbers)) # if numbers else 0  # Convert to int or return 0
# Apply function to each relevant column and sum the values
merged['Validated_CRADL_score'] = merged['Reside in a setting other than independent living?'].apply(extract_number)

In [None]:
y_true = merged["Validated_CRADL_score"]
y_pred = merged['llm_cradl_score']

print("CRADL RMSE:",root_mean_squared_error(y_true,y_pred))
cradl_score_rmse = root_mean_squared_error(y_true,y_pred)

In [None]:
from scipy.stats import spearmanr

# Example: correlation between LLM score and true RAI score
corr, p_value = spearmanr(merged['llm_RAI_score'], merged['Validated_RAI_Score'])

print(f"qwen3 30b Agentic Approach Spearman correlation: {corr:.3f}, p-value: {p_value:.3g}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.scatterplot(x='llm_RAI_score', y='Validated_RAI_Score', data=merged, legend= "full")
plt.legend(title=f'Spearman’s ρ = {corr:.2f}', loc = 'lower right')
plt.title(f"qwen3 30b Agent LLM System")
plt.xlabel("LLM RAI Score")
plt.ylabel("Patient-Reported RAI Score")
plt.grid(True)
plt.show()

In [None]:
#storing agentic metrics in a DF:
metrics_df = pd.DataFrame( columns = ['LLM Model', 'LLM System','RAI score RMSE','Age Sex Cancer Score RMSE', 'ADL Score RMSE', 'Medical Co-Morbidities Score RMSE'])
metrics_df = pd.DataFrame([{'LLM Model' :'qwen3 30b'
                            ,'LLM System' : 'Agent'
                            ,'Accuracy' : accuracy
                            ,'Precision' : precision
                            ,'Recall' : recall
                            ,'PR AUC': pr_auc
                            ,'F1-score': f1
                            ,'Spearman’s ρ': corr
                            ,'RAI score RMSE' : RAI_score_rmse 
                            ,'Age Sex Cancer Score RMSE': age_sex_cancer_score_rmse
                            ,'ADL Score RMSE' : adl_score_rmse
                            ,'Medical Co-Morbidities Score RMSE' : medicial_comorbidities_score_rmse
                            ,'CRADL RMSE': cradl_score_rmse}])
metrics_df

In [None]:
sub_score_df = merged[['MRN','Validated_RAI_Score','llm_RAI_score','Validated_Age_Sex_Cancer_Score','llm_age_sex_cancer_score','Validated_medical_comorbidities_score','llm_medical_comorbidities_score','Validated_CRADL_score','llm_cradl_score','ADL Score (sum of ADL scores) with/without cognitive decline consideration','llm_adlcd_score']]
sub_score_df

In [None]:
sub_score_df.to_csv(r'C:\Users\yens01\Projects\Frailty\RESULTS\Sub RAI Score results\qwen3 30b agent sub score.csv')

In [None]:
# metrics_df.to_csv(r'C:\Users\yens01\Projects\Frailty\METRICS\qwen2.5 72b agent approach results 4.10.2025.csv')

In [None]:
from scipy.stats import spearmanr

# Example: correlation between LLM score and true RAI score
corr, p_value = spearmanr(merged['llm_RAI_score'], merged['Validated_RAI_Score'])

print(f"qwen3 30b Agentic Approach Spearman correlation: {corr:.3f}, p-value: {p_value:.3g}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.scatterplot(x='Validated_RAI_Score', y='llm_RAI_score', data=merged)
plt.title(f"qwen3 30b Agentic Approach Spearman corr = {corr:.2f}")
plt.xlabel("Validated_RAI_Score")
plt.ylabel("llm_RAI_score")
plt.grid(True)
plt.show()