In [1]:
import base64
import pandas as pd
import json
import vertexai
import tiktoken # A popular tokenizer, though not directly from google-generativeai
import time
from vertexai.generative_models import GenerativeModel, Part, SafetySetting
import warnings
warnings.filterwarnings("ignore")

project_name = !(gcloud config get-value core/project)
project_name = project_name[0]

In [2]:
#Gemini
class LLM_model:
    def __init__(self, project, location, model_name):
        self.project = project
        self.location = location
        self.model_name = model_name
        self.generation_config = {
            "max_output_tokens": 8192,
            "temperature": 1,
            "top_p": 0.95,
        }
        self.safety_settings = [
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
            SafetySetting(
                category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
                threshold=SafetySetting.HarmBlockThreshold.OFF
            ),
        ]
        vertexai.init(project=self.project, location=self.location)
        self.model = GenerativeModel(self.model_name)

    def complete(self, text):
        chat = self.model.start_chat()
        result=chat.send_message([text],
                                 generation_config=self.generation_config,
                                 safety_settings=self.safety_settings
        )
        text_result=result.candidates[0].content.parts[0].text
        return text_result
# Example usage:
llm_gemini = LLM_model(project_name, 
                       "us-central1", 
                       "gemini-2.5-pro")



In [3]:
#example_text = "tell me about Prudential Insurance"
#gemini_generated_text = llm_gemini.complete(example_text)
#print(gemini_generated_text.strip())
#gemini_generated_text.

In [4]:
def sentiment_analysis(feedback, llm, llm_type='gemini'):
    
    # Configure the sentiment analysis prompt or question
    prompt = (
        """
        Role: You are an AI Specialist analysing customer feebdack from Google reviews. Your role is to understand the given context
        and perform the task provided as appropriately.
        
        Context: The feedbacks are given by the patients or the bystander of patients who went for the treatment to the hospital.
        
        Task: 
        1. Analyze the sentiment of the customer feedback if it’s positive, negative, or neutral. 
        2. If the feedback is neither positive nor negative but just neutrally stating an info or suggestion, 
        or is a non-meaningful feedback like 'Yes', 'No', 'Nil', 'na', 'N:A', 'N/a', 'n/a', 
        'not applicable' or 'no comment', the sentiment is neutral. 
        3. If sentiment is positive, return 'Positive'. 
        4. If sentiment is negative, return 'Negative'.
        5. If sentiment is neutral, return 'Neutral'. Do not include other words or explanation.
        
        This is the customer feedback: {feedback}. 
        
        Here are examples in this format Feedback-Sentiment:
        Its convenient, easy, fast: Positive.
        Nil: Neutral, 
        Customer service not professional-Negative.
        
    """
    )
    try:
        messages = [
            {"role": "specialist", "content": prompt}
        ]
        # result = llm.invoke(messages).content
        if llm_type=='gemini':
            result = llm.complete(prompt)
        else:
            result = llm.complete(prompt).text
        return result.strip()  # Using strip() to remove leading/trailing whitespace
    except Exception as e:
        print(f"Error processing row: {e}")
        return np.nan


In [5]:
def sentiment_analysis(feedback, llm, llm_type='gemini'):
    prompt = f"""
    Role: You are an AI Specialist analysing customer feedback from Google reviews. Your role is to understand the given context
    and perform the task provided as appropriately.

    Context: The feedbacks are given by the patients or the bystanders of patients who went for treatment to the hospital.

    Task:
    1. Analyze the sentiment of the customer feedback if it’s positive, negative, or neutral.
    2. If the feedback is neither positive nor negative but just neutrally stating an info or suggestion,
       or is a non-meaningful feedback like 'Yes', 'No', 'Nil', 'na', 'N:A', 'N/a', 'n/a',
       'not applicable' or 'no comment', the sentiment is neutral.
    3. If sentiment is positive, return 'Positive'.
    4. If sentiment is negative, return 'Negative'.
    5. If sentiment is neutral, return 'Neutral'. Do not include other words or explanation.

    This is the customer feedback: {feedback}

    Examples:
    Its convenient, easy, fast: Positive.
    Nil: Neutral.
    Customer service not professional: Negative.
    
    Output:
    1. provide the classification of sentiment - Positive, Negative, Neutral   
    
    """

    try:
        if llm_type == 'gemini':
            result = llm.complete(prompt)
        else:
            result = llm.complete(prompt).text
        return result.strip()
    except Exception as e:
        print(f"Error processing row: {e}")
        return None  # Use None unless you're sure np.nan is needed and numpy is imported

In [8]:
example_text="Helpful customer service officer"
example_output=sentiment_analysis(example_text, llm_gemini)
print(example_output)

Positive


In [9]:
def Information_extraction(feedback, llm, llm_type='gemini'):
    prompt = f"""
    Role: You are an AI Specialist analysing customer feedback from Google reviews. Your role is to understand the given context
    and perform the task provided as appropriately.

    Context: The feedbacks are given by the patients or the bystanders of patients who went for treatment to the hospital.

    Task:
    1. Analyze the feedback from the below mentioned topics and extract the information and provide as an output. If 
    the topics not found, mention as not found.
    1.1 From the whole feedback, extract the core issue, summarize the patient experience in 5 words and then
    follow the below tasks.
    2. Hospital Topic: 
        a. Does the review mention a specific ward, block or room number? if yes, extract it.
        b. Does the review mention about the department e.g: Endoscopy, Pharmacy, etc., if yes, extract it.
        c. Does the review describe whether the hospital is old, upgraded or modern?
    3. Staff & Service Topic:
        a. Does the review mention Doctors? if yes, extract it.
        b. Does the review mention nurses? if yes, extract it.
        c. Does the review mention support or admin staff? if yes, extract it.
        d. Does the review mention specific staff names? Extract them.
    4. Patient Experience:
        a. Does the review mention specific treatments or medical procedures (e.g. delivery, surgery)?
        b. What aspects of service quality are highlighted (e.g. attentiveness, professionalism, rudeness)?
        c. Does the reviewer mention waiting time? if yes, how long?
        d. Does the review mention food quality or any other quality (e.g. bedsheets, bathroom or cafeteria)?
    5. Complaints & Negative Aspects:
        a. Does the reviewer mention rude staff? Extract the details.
        b. Does the reviewer mention delays, long queues or poor management? Extract waiting times if given.
        c. Does the review mention unfair rules or inconsistent policies (e.g. visitor entry)?
        d. Does the reviewer explicitly call the hospital horrible, worst, terrifying, or disappointing?
        e. Does the review suggest profit motive or unnecessary procedures?
        f. Does the reviewer perceive fraudulent activity from the hospital?
    6. For each of the topics and its sub questions, provide output. If not found, mention as "Not found in feedback"

    This is the customer feedback: {feedback}

    Output:
    1. Provide the output in key-value pairs as JSON.
    2. Keys are only the core issue, summary, and the sub-questions (topic names not required).
    3. Values are the extracted output from the feedback.
    """

    # Token estimation
    try:
        import tiktoken
        enc = tiktoken.get_encoding("cl100k_base")
        prompt_tokens = len(enc.encode(prompt))
        print(f"Estimated input tokens (prompt): {prompt_tokens}")
    except ImportError:
        print("Install 'tiktoken' (pip install tiktoken) for token estimation.")
        prompt_tokens = -1

    # LLM completion
    try:
        if llm_type == 'gemini':
            result = llm.complete(prompt)
        else:
            result = llm.complete(prompt).text
        result = result.strip()
    except Exception as e:
        print(f"Error processing row: {e}")
        return None

    # Token estimation for response
    try:
        response_tokens = len(enc.encode(result))
        print(f"Estimated output tokens (response): {response_tokens}")
    except Exception:
        response_tokens = -1

    # Total token count
    if prompt_tokens != -1 and response_tokens != -1:
        total_estimated_tokens = prompt_tokens + response_tokens
        print(f"Total estimated tokens (prompt + response): {total_estimated_tokens}")

    return result

In [10]:
feedback = """
Gleneagles Hospital overcharged us during childbirth by admitting our newborn to the NICU. When the baby was transferred to KK Hospital, they discharged the infant immediately. During the 1 day stay in NICU, our baby was subjected to repeated and seemingly unnecessary needle procedures. Additionally, the nursing staff fed the baby a formula that caused vomiting. Although we requested a change in formula, it was only temporarily adjusted — and was later switched back to the original formula after a nurse shift change.

"""

In [11]:
#example_output=Information_extraction(feedback, llm_gemini)
#print(example_output)

In [6]:
df_Gleangles = pd.read_excel("Gleneagles Hospitalv2.xlsx")

In [7]:
df_Gleangles.head().transpose()

Unnamed: 0,0,1,2,3,4
query,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c
name,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital
google_id,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c
place_id,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU
location_link,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...
reviews_link,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...
reviews,448,448,448,448,448
rating,3.5,3.5,3.5,3.5,3.5
review_id,Ci9DQUlRQUNvZENodHljRjlvT25wRVNtWnVhbHBRZFdwdm...,Ci9DQUlRQUNvZENodHljRjlvT2pKS2NYZEZhM1psVmtaRU...,Ci9DQUlRQUNvZENodHljRjlvT25sNFYyWlFWMk5JWm5Fd0...,Ci9DQUlRQUNvZENodHljRjlvT2xSWGJHOTRXR280V2xsaG...,Ci9DQUlRQUNvZENodHljRjlvT2pCSGRGZEpWbk5KTTFKQ1...
review_pagination_id,CAESY0NBRVFBUnBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMk...,CAESYENBRVFBaHBDUTJrNFNVRlNTVzVEWjI5QlVEY3lSMk...,CAESY0NBRVFBeHBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMl...,CAESYENBRVFCQnBDUTJrNFNVRlNTVzVEWjI5QlVEY3lSMl...,CAESY0NBRVFCUnBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMl...


In [14]:
df_Gleangles_reviewtext = df_Gleangles.review_text

In [13]:
df_Gleangles_reviewtext.iloc[1:10]

1    Premium hospital, substandard service\n\nI wis...
2                                Test writing a review
3                                                  NaN
4    Hospital facility is good. however, it took me...
5                                                  NaN
6    I had my first surgery here 11 years ago, and ...
7    My very first hospitalization experience was a...
8    Bad experience for me, the hospital staff is r...
9    Gleneagles Hospital overcharged us during chil...
Name: review_text, dtype: object

In [15]:
import json
import fitz  # PyMuPDF
counter = 0
# Assuming df_Gleangles_reviewtext and Information_extraction are already defined
with open("review_output_Gleneagles.txt", "w", encoding="utf-8") as f:
    for idx,review in enumerate(df_Gleangles_reviewtext,start=0):
        
        start_time = time.time()
        print(f"Processing row {idx}...")

        example_output = Information_extraction(review, llm_gemini)
        #o = example_output.replace('```', '')
        
        #cleaned_output = o.replace('json', '')
        
        
        try:
            # If the output contains extra text before/after JSON, isolate the JSON part
            start = example_output.find('{')
            end = example_output.rfind('}') + 1
            json_str = example_output[start:end]

            # Parse the JSON
            ie_output_dict = json.loads(json_str)
        except json.JSONDecodeError as e:
            print("JSON decoding failed:", e)
            print("Raw output:", example_output)
            ie_output_dict = None

        
        f.write("Original Review:\n")
        f.write(str(review) + "\n")
        f.write("Extracted Information:\n")
        f.write(json.dumps(ie_output_dict, indent=4))
        f.write("\n---------------------------------------------------\n")

        f.write("Next Review ########################################\n\n")
        

        elapsed_time = time.time() - start_time
        print(f"Row {idx} processed in {elapsed_time:.2f} seconds.\n")



Processing row 0...
Estimated input tokens (prompt): 575
Estimated output tokens (response): 306
Total estimated tokens (prompt + response): 881
Row 0 processed in 11.68 seconds.

Processing row 1...
Estimated input tokens (prompt): 1017
Estimated output tokens (response): 577
Total estimated tokens (prompt + response): 1594
Row 1 processed in 18.10 seconds.

Processing row 2...
Estimated input tokens (prompt): 571
Estimated output tokens (response): 289
Total estimated tokens (prompt + response): 860
Row 2 processed in 18.59 seconds.

Processing row 3...
Estimated input tokens (prompt): 568
Estimated output tokens (response): 249
Total estimated tokens (prompt + response): 817
Row 3 processed in 9.25 seconds.

Processing row 4...
Estimated input tokens (prompt): 605
Estimated output tokens (response): 542
Total estimated tokens (prompt + response): 1147
Row 4 processed in 19.30 seconds.

Processing row 5...
Estimated input tokens (prompt): 568
Estimated output tokens (response): 246
T


### Step 2: Convert .txt to .pdf
with open("review_output.txt", "r", encoding="utf-8") as f:
    text = f.read()

doc = fitz.open()
lines = text.split('\n')
chunk = ""
chunks = []
line_count = 0
max_lines_per_page = 50

for line in lines:
    chunk += line + '\n'
    line_count += 1
    if line_count >= max_lines_per_page:
        chunks.append(chunk)
        chunk = ""
        line_count = 0
if chunk:
    chunks.append(chunk)

for chunk in chunks:
    page = doc.new_page()
    rect = fitz.Rect(50, 50, 550, 800)
    page.insert_textbox(rect, chunk, fontsize=11, fontname="helv")

doc.save("review_output.pdf")
doc.close()


In [None]:
df_

In [8]:

with open("text_reviews_Glenagles.txt", "w", encoding="utf-8") as f:
    for i, row in df_Gleangles.iterrows():
        f.write(f"--- Review #{i + 1} ---\n")
        f.write(f"Datetime: {row['review_datetime_utc']}\n")
        f.write(f"User: {row['author_title']}\n")
        f.write(f"Review: {row['review_text']}\n")
        f.write("--- End of Review ---\n\n")


In [17]:
df_Gleangles.head().transpose()

Unnamed: 0,0,1,2,3,4
query,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c
name,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital,Gleneagles Hospital
google_id,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c,0x31da1a2024d17459:0x2559534cd241d05c
place_id,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU,ChIJWXTRJCAa2jERXNBB0kxTWSU
location_link,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...,https://www.google.com/maps/place/Gleneagles+H...
reviews_link,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...,https://search.google.com/local/reviews?placei...
reviews,448,448,448,448,448
rating,3.5,3.5,3.5,3.5,3.5
review_id,Ci9DQUlRQUNvZENodHljRjlvT25wRVNtWnVhbHBRZFdwdm...,Ci9DQUlRQUNvZENodHljRjlvT2pKS2NYZEZhM1psVmtaRU...,Ci9DQUlRQUNvZENodHljRjlvT25sNFYyWlFWMk5JWm5Fd0...,Ci9DQUlRQUNvZENodHljRjlvT2xSWGJHOTRXR280V2xsaG...,Ci9DQUlRQUNvZENodHljRjlvT2pCSGRGZEpWbk5KTTFKQ1...
review_pagination_id,CAESY0NBRVFBUnBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMk...,CAESYENBRVFBaHBDUTJrNFNVRlNTVzVEWjI5QlVEY3lSMk...,CAESY0NBRVFBeHBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMl...,CAESYENBRVFCQnBDUTJrNFNVRlNTVzVEWjI5QlVEY3lSMl...,CAESY0NBRVFCUnBFUTJwRlNVRlNTWEJEWjI5QlVEY3lSMl...
