<a href="https://colab.research.google.com/github/pantipolo12/Diet-Recommendation-LLM/blob/main/Diet_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OCR (Optical Character Recognition)

In [None]:
# @title
# Given Dataset (Chose relevant prescriptions)
!gdown 1DvcXqFzEDlrFQTOQV_qEZjDWBT1MnnFl
!gdown 1ENdiOHRYL8ImpFuBR9nLSRohF8nBmvJS
!gdown 1rHIa19GiUb5xlG6ptdJXyaTs5GD8tyl0
!gdown 196Ww4pmzDvRHNIhBU4zhwfO9cY7qQJI0

In [None]:
# Installing OCR libraries
%%capture
!pip install pypdfium2
!pip install poppler-utils
!pip install pdf2image
!pip install tesseract-ocr
!pip install libtesseract-dev
!pip install pytesseract

In [None]:
# Importing necessary libraries
import pypdfium2 as pyp
from pdf2image import convert_from_path
from IPython.display import display, Image
import cv2
import pytesseract
import os
import pandas as pd
import numpy as np
import nltk
import re
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt_tab')

In [None]:
# Removes any skew in the image
def deskew(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)
    coords = np.column_stack(np.where(gray > 0))
    angle = cv2.minAreaRect(coords)[-1]

    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    return rotated

# Extracts text from image
def extract_text_from_image(image):
    text = pytesseract.image_to_string(image)
    return text

In [None]:
# Directory where the pdfs are stored
data_dir = '/content'

In [None]:
# Returns tokenized text extracted from pdf or images after new line/empty string removal
def read_prescription(path):
  pdf = pyp.PdfDocument(path)
  pages = len(pdf)
  text = []

  for i in range(pages):
      raw = pdf[i].get_textpage().get_text_range()
      words = word_tokenize(raw)
      text.extend(words)

  # If the pdf is not read, that means we have an image and we read via OCR
  if len(text) == 0:
    pages = convert_from_path(path)
    extracted_text = []

    for page in pages:
        preprocessed_image = deskew(np.array(page))
        text1 = extract_text_from_image(preprocessed_image)
        extracted_text.append(text1)

    # The text extracted is erronous, we combine all of it and then split
    # via spaces and remove unnecessary new line characters and empty strings
    text.extend(extracted_text)
    temp = ""
    for x in text:
      temp += x + " "
    text = [temp]

    text = text[0].split(" ")
    remove = []
    for i in range(len(text)):
      text[i] = re.sub('\n', '', text[i])
      if text[i] == "":
        remove.append(i)
    for i in remove[::-1]:
      text.pop(i)

  return text

In [None]:
# Find all the pdf files in the current directory
pdf_files = [f for f in os.listdir(data_dir) if f.endswith('.pdf')]

# Empty df
df = pd.DataFrame(columns=['file', 'text'])
title = []
texts = []
for pdf_file in pdf_files:
  path = os.path.join(data_dir, pdf_file)
  text = read_prescription(path)
  texts.append(text)
  title.append(pdf_file)

df['file'] = title
df['text'] = texts

In [None]:
# Extracting medications or advice
keywords = ["medication", "medicine", "advice", "advised" , "food", "diet", "meal"]
medicines = []
for i in range(df.shape[0]):
  prescription = df.iloc[i][1]
  flag = False
  for word in keywords:
    for med in range(len(prescription)):
      if word in prescription[med].lower():
        prescription = prescription[med:]
        flag = True
      if flag:
        break
    if flag:
      break
  medicines.append(" ".join(prescription))

df["extracted"] = medicines

In [None]:
complete = []
for i in range(df.shape[0]):
  prescription = df.iloc[i][1]
  prescription = " ".join(prescription)
  complete.append(prescription)

df["complete"] = complete

In [None]:
df

In [None]:
df.iloc[1][2]

# Gemini (A Generative AI Model developed by Google)

In [None]:
%%capture
!pip install google-generativeai

In [None]:
#Gemini API: {YOUR_API_KEY_HERE}
import os
import google.generativeai as genai
from google.colab import userdata

gemini_key = userdata.get('secret')

# Configure google-generativeai library with the API key
genai.configure(api_key = gemini_key)

In [None]:
# Supported models for generateContent method
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(m.name)

In [None]:
# Simple Test for Gemini
from IPython.display import Markdown
model = genai.GenerativeModel('gemini-2.5-flash')

response = model.generate_content("Can you provide medical details of say DOLO tablets.")

Markdown(response.text) #display text as Markdown

In [None]:
# Prescription Test for Gemini
from IPython.display import Markdown, display

model = genai.GenerativeModel('gemini-2.5-flash')

for i in range(df.shape[0]):

  print(f"Below Prescription: {df.iloc[i][0]}")

  response = model.generate_content(df.iloc[i][2] + ". Please identify medicines and their associated symptoms from this prescription, also identify associated diseases. Based on this give Food Recommendations.")

  display(Markdown(response.text))

In [None]:
# Prescription Test for Gemini
from IPython.display import Markdown, display

model = genai.GenerativeModel('gemini-2.5-flash')

for i in range(df.shape[0]):

  print(f"Below Prescription: {df.iloc[i][0]}")

  response = model.generate_content(df.iloc[i][3] + ". Please identify medicines and their associated symptoms from this prescription, also identify associated diseases.  Based on this give Food Recommendations. Structure response in simple table format")

  display(Markdown(response.text))

In [None]:
response.candidates[0].content.parts[0].text

In [None]:
def gemini_prompt(prompt):
  import os
  import google.generativeai as genai
  from google.colab import userdata

  gemini_key = userdata.get('secret')

  # Configure google-generativeai library with the API key

  genai.configure(api_key = gemini_key)
  model = genai.GenerativeModel('gemini-2.5-flash')
  response = model.generate_content(prompt)
  return response

# Llama (Large Language Model Meta AI)

In [None]:
!pip install -U google-generativeai -q

import google.generativeai as genai
from IPython.display import Markdown

# Use your Google AI Studio key
genai.configure(api_key="secret")


In [None]:
!pip uninstall -y google-generativeai google-ai-generativelanguage
!pip install -U google-generativeai



In [None]:
import google.generativeai as genai
from IPython.display import Markdown

# Configure Gemini (replace with your API key or Colab userdata)
gen_key = userdata.get('secret')
genai.configure(api_key=gen_key)

def run_gemini(api_request_json):
    # Force correct Gemini model (v1 style)
    model_name = api_request_json.get("model", "gemini-2.5-flash")
    if not model_name.startswith("gemini"):
        model_name = "gemini-2.5-flash"

    messages = api_request_json["messages"]

    # Build prompt
    system_prompt = ""
    conversation = ""
    for msg in messages:
        if msg["role"] == "system":
            system_prompt += msg["content"] + "\n"
        elif msg["role"] == "user":
            conversation += f"User: {msg['content']}\n"

    prompt = system_prompt + "\n" + conversation

    # Use v1 API
    model = genai.GenerativeModel(model_name)
    response = model.generate_content(prompt)

    return {"choices": [{"message": {"content": response.text}}]}


# ✅ Test call
api_request_json = {
    "model": "llama3-70b",  # automatically replaced
    "messages": [
        {"role": "system", "content": "You are a llama assistant that starts every word with 'll'."},
        {"role": "user", "content": "Hi, happy llama day!"}
    ]
}

response = run_gemini(api_request_json)
print(response["choices"][0]["message"]["content"])


In [None]:
response = run_gemini(api_request_json)
print(response["choices"][0]["message"]["content"])


In [None]:
# ✅ Prescription test using Gemini (replaces llama.run)
from IPython.display import Markdown, display

# Build API-style request (same as before)
api_request_json = {
  "model": "gemini-2.5-flash",  # replaced llama3-70b with Gemini model
  "messages": [
    {"role": "system", "content": "Please provide information about different medicines, their symptoms and their associated diseases from the prescription."},
    {"role": "user", "content": df.iloc[0][2]},
    {"role": "user", "content": df.iloc[1][2]},
    {"role": "user", "content": df.iloc[2][2]},
    {"role": "user", "content": df.iloc[3][2]}
  ]
}

# Run with Gemini wrapper
response = run_gemini(api_request_json)

# Extract model output
gemini_output = response["choices"][0]["message"]["content"]

# Display nicely
display(Markdown(gemini_output))


In [None]:
# Accuracy and Validity Check for Prescriptions
from IPython.display import Markdown, display

for i in range(df.shape[0]):
    prescription_text = df.iloc[i][3]  # complete text
    prompt = f"""
    Evaluate the following prescription analysis for accuracy and validity.
    Prescription: {prescription_text}

    Please provide:
    1. A validity percentage (how complete and correct is the extracted medicine/disease/food info).
    2. An error percentage (how much information is missing or incorrect).
    3. Overall accuracy estimate (based on medical correctness and completeness).
    Provide the results as a simple table:
    | Validity (%) | Error (%) | Accuracy (%) |
    """
    response = gemini_prompt(prompt)
    print(f"Prescription: {df.iloc[i][0]}")
    display(Markdown(response.text))


# Complete Pipeline

In [None]:
!apt-get install -y poppler-utils

In [None]:
!which pdfinfo
!which pdftoppm

In [None]:
def get_diet_recommendation():
    from google.colab import files
    from pdf2image import convert_from_path
    from IPython.display import Image, Markdown, display

    # Ask the user to upload a file
    print("Please upload a PDF prescription file:")
    uploaded_file = files.upload()

    # Check if a file was uploaded
    if len(uploaded_file) > 0:
        file_name = next(iter(uploaded_file))
        if not file_name.endswith(".pdf"):
            print("The uploaded file is not a PDF.")
            return
        print("File uploaded successfully:", file_name)
    else:
        print("No file uploaded.")
        return

    # Convert first page of PDF to image and display it
    file_path = "/content/" + file_name
    pages = convert_from_path(file_path, poppler_path="/usr/bin", first_page=1, last_page=1)
    first_page_image = pages[0]
    image_path = "/content/first_page_image.png"
    first_page_image.save(image_path, "PNG")

    print("\nDisplaying first page of the uploaded PDF:")
    display(Image(filename=image_path, width=600))

    # Step 1 — OCR the uploaded PDF
    print("\nReading Text...")
    ocr = read_prescription(file_path)
    ocr_text = " ".join(ocr)

    # Step 2 — Build request for Gemini
    print("\nIdentifying diseases and medicines...")
    api_request_json = {
        "model": "gemini-2.5-flash",  # updated model name
        "messages": [
            {"role": "system", "content": (
                "You are a medical analysis assistant. "
                "Extract medicines from the uploaded prescription, identify their associated symptoms "
                "and diseases, and provide food recommendations in a clear table format."
            )},
            {"role": "user", "content": ocr_text}
        ]
    }

    # Step 3 — Run Gemini request
    response = run_gemini(api_request_json)
    gemini_output = response["choices"][0]["message"]["content"]

    # Step 4 — Display results
    print("\nGenerating diet recommendations...")
    display(Markdown(gemini_output))

    return


In [None]:
get_diet_recommendation()

In [None]:
get_diet_recommendation()
