## Hospital Recommendation Engine (Ranking + RAG Output + LLM Layer)

### Hospital Ranking with Analytical Hireracy Process

In [1]:
import pandas as pd
import numpy as np

### Loading the hospital performance metrics data

In [6]:
consolidated_metrics = pd.read_excel("hospital metrics.xlsx",sheet_name='Sheet1')

In [7]:
consolidated_metrics.head()

Unnamed: 0,S.No,Metrics,NUH Metric,Gleanagles Metric,Khoo Tech Puat,SGH,TTS,KK Women,Mt Alvernia,Mt Elizabeth
0,0,Average Rating,3.12,3.53,2.34,2.71,2.64,2.74,3.5,3.61
1,1,Average Sentiment,0.08,0.12,0.02,0.04,0.03,0.03,0.11,0.1
2,2,Response Rate (%),0.0,41.18,0.0,0.54,0.0,0.0,6.09,39.5
3,3,One-time Reviewer %,10.42,7.92,8.67,8.11,7.5,7.88,5.42,8.29
4,4,Frequent Reviewer %,62.89,70.81,60.33,71.89,69.0,66.5,71.56,65.32


In [8]:

n_metrics = len(consolidated_metrics)
pairwise_matrix = np.ones((n_metrics, n_metrics))
print(pairwise_matrix)
important_indices = [0, 1, 11, 13]
for i in range(n_metrics):
    for j in range(n_metrics):
        if i in important_indices and j not in important_indices:
            pairwise_matrix[i][j] = 3
            pairwise_matrix[j][i] = 1 / 3


[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


### Deriving Eigen Values and Eigen Vectors

In [11]:

eigenvalues, eigenvectors = np.linalg.eig(pairwise_matrix)
print(eigenvalues)
#print(eigenvectors)
max_index = np.argmax(eigenvalues)
print(max_index)
principal_eigenvector = np.real(eigenvectors[:, max_index])
print(principal_eigenvector)
priority_vector = principal_eigenvector / principal_eigenvector.sum()
print(priority_vector)

[ 0.00000000e+00  1.50000000e+01  1.10621119e-15 -3.79474131e-16
  6.96336207e-17 -1.68497167e-32  7.23196358e-32 -7.61813808e-33
  1.12078954e-46 -1.86896029e-48  2.19653646e-49  1.34506001e-50
  8.95547117e-55 -1.22508042e-64 -2.37375467e-69]
1
[-0.43759497 -0.43759497 -0.14586499 -0.14586499 -0.14586499 -0.14586499
 -0.14586499 -0.14586499 -0.14586499 -0.14586499 -0.14586499 -0.43759497
 -0.14586499 -0.43759497 -0.14586499]
[0.13043478 0.13043478 0.04347826 0.04347826 0.04347826 0.04347826
 0.04347826 0.04347826 0.04347826 0.04347826 0.04347826 0.13043478
 0.04347826 0.13043478 0.04347826]


In [12]:

normalized_df = (df - df.min()) / (df.max() - df.min())


In [14]:
print(normalized_df)

                           NUH  Gleanagles  Khoo Tech Puat       SGH  \
Average Rating        0.012417    0.012726        0.006058  0.007837   
Average Sentiment     0.000318    0.000000        0.000052  0.000000   
Response Rate (%)     0.000000    0.153232        0.000000  0.001468   
One-time Reviewer %   0.041471    0.029109        0.022444  0.023687   
Frequent Reviewer %   0.250298    0.263808        0.156178  0.210889   
Avg Review Length     1.000000    1.000000        1.000000  1.000000   
Avg Likes             0.150038    0.157260        0.121606  0.130712   
Rating 1 %            0.152870    0.090722        0.154029  0.145847   
Rating 2 %            0.015562    0.022354        0.015972  0.012562   
Rating 3 %            0.021770    0.033326        0.007766  0.012562   
Rating 4 %            0.047162    0.048515        0.010355  0.025271   
Rating 5 %            0.160630    0.176034        0.070750  0.096654   
Neutral Sentiment %   0.192510    0.179355        0.105465  0.13

### Calcualting Ranking for Hospitals

In [16]:

weighted_df = normalized_df.mul(priority_vector, axis=0)


In [17]:
ahp_scores = weighted_df.sum()

In [18]:

ranked_hospitals = ahp_scores.sort_values(ascending=False)
print("Hospital Ranking based on Full AHP Process with Eigenvector Method:")
print(ranked_hospitals)


Hospital Ranking based on Full AHP Process with Eigenvector Method:
Mt Elizabeth      0.141386
Gleanagles        0.132062
NUH               0.125233
Mt Alvernia       0.109956
KK Women          0.105259
SGH               0.101814
TTS               0.100535
Khoo Tech Puat    0.093420
dtype: float64


In [19]:
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Dict, List
import numpy as np
import pandas as pd

In [24]:
from fastapi import FastAPI
import pandas as pd
import numpy as np

app = FastAPI()

@app.get("/rank_hospitals")
def rank_hospitals():
    df = pd.read_excel("hospital metrics.xlsx", sheet_name='Sheet1')
    n_metrics = len(df)
    pairwise_matrix = np.ones((n_metrics, n_metrics))
    important_indices = [0, 1, 11, 13]
    for i in range(n_metrics):
        for j in range(n_metrics):
            if i in important_indices and j not in important_indices:
                pairwise_matrix[i][j] = 3
                pairwise_matrix[j][i] = 1 / 3
    eigenvalues, eigenvectors = np.linalg.eig(pairwise_matrix)
    max_index = np.argmax(eigenvalues.real)
    priority_vector = eigenvectors[:, max_index].real
    priority_vector = priority_vector / priority_vector.sum()
    normalized_df = (df - df.min()) / (df.max() - df.min())
    weighted_df = normalized_df.mul(priority_vector, axis=0)
    ahp_scores = weighted_df.sum()
    ranked_hospitals = ahp_scores.sort_values(ascending=False)
    return ranked_hospitals.to_dict()

### LLM for Recommendation

In [27]:
import getpass
import os


import base64
import pandas as pd
import json
import vertexai
import tiktoken # A popular tokenizer, though not directly from google-generativeai
import time
from vertexai.generative_models import GenerativeModel, Part, SafetySetting
import warnings
warnings.filterwarnings("ignore")
from langchain_text_splitters import RecursiveCharacterTextSplitter

project_name = !(gcloud config get-value core/project)
project_name = project_name[0]
from llama_index.llms.langchain import LangChainLLM
from langchain_google_vertexai import VertexAIEmbeddings


from llama_index.core.schema import Document
# import
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
#from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb
from llama_index.core import Settings

In [2]:
RAG_output = """


There are no reviews available that specifically mention the process or care for appendicitis 
surgery at SGH.While reviews for other conditions and procedures exist—such as breast surgery, 
a traumatic brain injury, and treatment for an infected thumb—none pertain to appendicitis. 
One individual who had "a surgery" was impressed with the doctors' skills and experience but 
found the administrative staff and the Accounts & Billing Department's attitudes to be appalling.

"""

In [135]:
#Gemini
class LLM_model:
    def __init__(self, project, location, model_name):
        self.project = project
        self.location = location
        self.model_name = model_name
        self.generation_config = {
            "max_output_tokens": 3000,
            "temperature": 0,
            "top_p": 0.9
        }
        self.safety_settings = [
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
        ]
        vertexai.init(project=self.project, location=self.location)
        self.model = GenerativeModel(self.model_name)

    def complete(self, text):
        chat = self.model.start_chat()
        result=chat.send_message([text],
                                 generation_config=self.generation_config,
                                 safety_settings=self.safety_settings
        )
        text_result=result.candidates[0].content.parts[0].text
        return text_result
# Example usage:
llm_gemini = LLM_model(project_name, 
                       "us-central1", 
                       "gemini-2.5-pro")

In [136]:
def Summarize_RAGoutput(query_input,RAG_output, llm,llm_type='gemini'):
    
    
    # Configure the sentiment analysis prompt or question
    prompt = (f"""

Role: You are an AI assistant to provide recommendation.

Context: You have been given a user query and the output from a RAG engine. Using these two pieces of information, perform the following tasks:

query input: {query_input}
RAG Output:
{RAG_output}

Tasks:
1. Determine whether the RAG Output is directly or indirectly relevant to the query input.
2. If relevant, provide a relevancy score from 1 to 10 (10 = most relevant).
3. Provide a concise explanation for the relevancy score.

Output your answer as a JSON object with two fields: "relevancy_score" (integer) and "explanation" (string).


"""

    )
    try:
        messages = [
            {"role": "assistant", "content": prompt}
        ]
        # result = llm.invoke(messages).content
        if llm_type=='gemini':
            result = llm.complete(prompt)
        else:
            result = llm.complete(prompt).text
        return result.strip()  # Using strip() to remove leading/trailing whitespace
    except Exception as e:
        print(f"Error processing row: {e}")
        return np.nan


In [137]:
query_input = """

Are there any reviews about the process and care for appendicitis surgery at SGH?
"""

In [138]:
RAG_output

'\n\n\n**There are no reviews available that specifically mention the process or care for appendicitis surgery at SGH.\n\nWhile reviews for other conditions and procedures exist—such as breast surgery, a traumatic brain injury, and treatment for an infected thumb—none pertain to appendicitis. One individual who had "a surgery" was impressed with the doctors\' skills and experience but found the administrative staff and the Accounts & Billing Department\'s attitudes to be appalling.**\n\n'

In [139]:
LLM_reco = Summarize_RAGoutput(query_input,RAG_output,llm_gemini)

In [140]:
print(LLM_reco)

```json
{
  "relevancy_score": 9,
  "explanation": "The RAG output is highly relevant because it directly addresses the user's query by explicitly stating that no reviews for appendicitis surgery at SGH are available. It also provides the next best information: a review for a general surgery at the same hospital, which offers some insight into the quality of doctors and administrative staff."
}
```
