## Install NLP libraries needed for medical text understanding

In [1]:
!pip install transformers torch scikit-learn pandas



## Import libraries

In [2]:
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModel
import torch

## Load Pretrained BERT Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]



vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: emilyalsentzer/Bio_ClinicalBERT
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.predictions.transform.dense.weight     | UNEXPECTED |  | 
cls.predictions.decoder.weight             | UNEXPECTED |  | 
cls.seq_relationship.bias                  | UNEXPECTED |  | 
cls.predictions.transform.dense.bias       | UNEXPECTED |  | 
cls.predictions.bias                       | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED |  | 
cls.seq_relationship.weight                | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


## Input Doctor Prescription Text

In [4]:
doctor_note = """
Patient has high blood pressure.
Avoid salty and fried foods.
Increase intake of fruits and vegetables.
Limit sugar consumption.
"""

## Clean the Text

In [5]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

cleaned_text = clean_text(doctor_note)
cleaned_text


'\npatient has high blood pressure\navoid salty and fried foods\nincrease intake of fruits and vegetables\nlimit sugar consumption\n'

## Convert Text to BERT Embeddings

In [6]:
inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True)

with torch.no_grad():
    outputs = model(**inputs)

embeddings = outputs.last_hidden_state.mean(dim=1)
embeddings

tensor([[ 1.3749e-01,  4.8324e-02, -3.5130e-01,  7.6508e-02,  8.1559e-02,
          3.9022e-01, -9.4997e-03,  6.9880e-03, -1.5556e-01, -1.1921e-02,
         -1.0461e-01,  1.9578e-01, -2.1591e-01,  3.6902e-01, -4.4731e-01,
         -1.6068e-01, -3.3474e-02, -6.0823e-02, -5.2926e-02, -2.5226e-02,
         -1.0763e-01, -1.9981e-01, -3.7520e-02, -2.6165e-01, -4.2923e-01,
         -9.1957e-02, -5.2064e-03,  8.4400e-01, -1.7951e-01,  3.9167e-01,
         -2.7451e-01,  1.9355e-01, -1.1651e-01,  2.6436e-01, -5.9437e-02,
         -3.2636e-02,  2.5756e-01,  2.2475e-01,  3.8795e-02,  2.5025e-01,
          1.3748e-01, -7.1675e-02,  2.1566e-01,  3.5008e-01,  1.0715e-01,
         -6.2025e-02, -4.4101e-02, -8.7144e-02, -4.0562e-01,  3.3517e-01,
         -1.9866e-01,  2.1776e-01,  2.0170e-01, -1.1455e-01,  4.7114e-01,
         -1.9648e-01, -3.5585e-01, -1.2101e-01, -1.1583e-01,  6.1102e-01,
          3.2466e-01, -1.1321e-01, -1.2756e-01,  1.1115e-01,  1.4647e-01,
          2.8904e-01,  5.7215e-01, -9.

## Define Diet Mapping Rules

In [7]:
diet_rules = {
    "high blood pressure": {
        "recommended": ["fruits", "vegetables", "low-fat foods"],
        "avoid": ["salt", "fried foods"]
    },
    "diabetes": {
        "recommended": ["whole grains", "fiber rich foods"],
        "avoid": ["sugar", "sweets"]
    }
}

## Extract Medical Conditions from Text

In [8]:
def extract_conditions(text, rules):
    found_conditions = []
    for condition in rules:
        if condition in text:
            found_conditions.append(condition)
    return found_conditions

conditions = extract_conditions(cleaned_text, diet_rules)
conditions


['high blood pressure']

## Generate Actionable Diet Guidelines

In [9]:
def generate_diet_guidelines(conditions, rules):
    recommendations = []
    avoid_list = []

    for condition in conditions:
        recommendations.extend(rules[condition]["recommended"])
        avoid_list.extend(rules[condition]["avoid"])

    return {
        "Recommended Foods": list(set(recommendations)),
        "Foods to Avoid": list(set(avoid_list))
    }

diet_guidelines = generate_diet_guidelines(conditions, diet_rules)
diet_guidelines

{'Recommended Foods': ['fruits', 'vegetables', 'low-fat foods'],
 'Foods to Avoid': ['fried foods', 'salt']}

## Final Structured Output

In [10]:
output = {
    "Interpreted Conditions": conditions,
    "Diet Recommendations": diet_guidelines
}

output

{'Interpreted Conditions': ['high blood pressure'],
 'Diet Recommendations': {'Recommended Foods': ['fruits',
   'vegetables',
   'low-fat foods'],
  'Foods to Avoid': ['fried foods', 'salt']}}