In [None]:
! pip install transformers

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
import random
from datetime import datetime, timedelta
import torch

class SimpleNotesGenerator:
    def __init__(self,
                 df_appointments_path='appointments.csv',
                 df_treatments_path='treatments.csv',
                 df_patients_path='patients.csv',
                 df_doctors_path='doctors.csv'):

        self.df_appointments = pd.read_csv(df_appointments_path)
        self.df_treatments = pd.read_csv(df_treatments_path)
        self.df_patients = pd.read_csv(df_patients_path)
        self.df_doctors = pd.read_csv(df_doctors_path)

        print("Loading model...")

        # Use a smaller model without quantization
        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None,
            trust_remote_code=True
        )

        print("Model loaded")

    def generate_note(self, appointment_row, treatment_row, patient_row):
        """Generate a doctor's note"""

        age = self._calculate_age(patient_row['date_of_birth'])
        gender = 'male' if patient_row['gender'] == 'M' else 'female'

        prompt = f"""<|system|>You are a medical professional writing clinical notes.<|end|>
<|user|>Write a brief medical note for:
Patient: {age}yo {gender}
Visit reason: {appointment_row['reason_for_visit']}
Treatments: {treatment_row['treatment_types']}
Write a concise clinical note (150-200 words):<|end|>
<|assistant|>"""

        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

        if torch.cuda.is_available():
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=250,
                temperature=0.7,
                do_sample=True,
                top_p=0.9
            )

        note = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
        return note.strip()

    def _calculate_age(self, dob):
        try:
            dob_date = pd.to_datetime(dob)
            today = datetime.today()
            return today.year - dob_date.year - ((today.month, today.day) < (dob_date.month, dob_date.day))
        except:
            return 45  # default age

    def generate_all_notes(self, limit=None):
        """Generate notes for completed appointments"""
        results = []

        # Merge data
        merged = self.df_appointments[
            self.df_appointments['status'] == 'Completed'
        ].merge(self.df_treatments, on='appointment_id', how='inner')

        if limit:
            merged = merged.head(limit)

        print(f"\nGenerating {len(merged)} notes...")

        for idx, row in merged.iterrows():
            try:
                patient = self.df_patients[self.df_patients['patient_id'] == row['patient_id']].iloc[0]
                note = self.generate_note(row, row, patient)

                results.append({
                    'note_id': idx + 1,
                    'patient_id': row['patient_id'],
                    'appointment_id': row['appointment_id'],
                    'doctor_id': row['doctor_id'],
                    'note_date': pd.to_datetime(row['appointment_date']) + timedelta(hours=random.randint(0, 8)),
                    'doctor_comments': note
                })

                if (idx + 1) % 5 == 0:
                    print(f"Progress: {idx + 1}/{len(merged)}")

            except Exception as e:
                print(f"Error at row {idx}: {e}")
                continue

        return pd.DataFrame(results)


generator = SimpleNotesGenerator()


df_notes = generator.generate_all_notes(limit=200)  # Generate 200 notes first

print("\nSample notes:")
print(df_notes[['appointment_id', 'doctor_comments']].head(3))

# If successful, generate all
# df_notes_all = generator.generate_all_notes()
# df_notes_all.to_csv('doctor_notes.csv', index=False)


Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model loaded

Generating 200 notes...
Progress: 5/200
Progress: 10/200
Progress: 15/200
Progress: 20/200
Progress: 25/200
Progress: 30/200
Progress: 35/200
Progress: 40/200
Progress: 45/200
Progress: 50/200
Progress: 55/200
Progress: 60/200
Progress: 65/200
Progress: 70/200
Progress: 75/200
Progress: 80/200
Progress: 85/200
Progress: 90/200
Progress: 95/200
Progress: 100/200
Progress: 105/200
Progress: 110/200
Progress: 115/200
Progress: 120/200
Progress: 125/200
Progress: 130/200
Progress: 135/200
Progress: 140/200
Progress: 145/200
Progress: 150/200
Progress: 155/200
Progress: 160/200
Progress: 165/200
Progress: 170/200
Progress: 175/200
Progress: 180/200
Progress: 185/200
Progress: 190/200
Progress: 195/200
Progress: 200/200

Sample notes:
   appointment_id                                    doctor_comments
0              12  Patient: 17yo female\nVisit reason: Bleeding/B...
1              21  Patient: 3-year-old female\nVisit reason: Back...
2              24  [Patient: 57yo male]\

In [None]:
df_notes.to_csv('doctor_notes_200.csv', index = False)

#### MedicalImages

In [None]:
# Medical Images are usually attached to 'Diagnostic' appointments
# create fake data for patients - downloading images from open source sites like kaggle
# ultrasounds - pregnancies, x rays - broken bones, MRI - brain scanning, CT scan - broken bones