<a href="https://colab.research.google.com/github/sainiakhil/AI-receptionist-for-Doctor/blob/main/AI_receptionist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers qdrant-client aiohttp asyncio bitsandbytes

In [None]:
!pip install --upgrade transformers

In [None]:
!pip install -q sentence-transformers
!pip install faiss-cpu



In [None]:
import torch
import numpy as np
import faiss
from transformers import BitsAndBytesConfig # Import the BitsAndBytesConfig class
from sentence_transformers import SentenceTransformer
import asyncio
import random
from transformers import pipeline

In [None]:
# Initialize LLM from Hugging Face


llm = pipeline("text-generation", model="NousResearch/Meta-Llama-3.1-8B")
llm.to('cuda')

# Initialize the embedding model (e.g., SentenceTransformer)
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:

def populate_emergency_responses():

    emergency_responses = {
            "not breathing": "Start CPR immediately. Push hard and fast in the center of the chest and provide rescue breaths.",
            "chest pain": "Stay calm and take slow, deep breaths. Sit in a comfortable position while waiting for help."
        }

      # Encode the descriptions into vectors
    descriptions = list(emergency_responses.keys())
    vectors = model.encode(descriptions)

      # Initialize FAISS index
    dimension = vectors.shape[1]  # Dimension of the vectors
    index = faiss.IndexFlatL2(dimension)  # L2 distance metric

      # Add vectors to the FAISS index
    index.add(np.array(vectors))

      # Optional: Keep track of the responses
    response_map = {i: emergency_responses[desc] for i, desc in enumerate(descriptions)}

    return index, response_map

In [None]:
db, response_map = populate_emergency_responses()

In [None]:
# Query the vector database asynchronously

async def query_vector_db(emergency_description):
    #await asyncio.sleep(15)  # Simulate 15 seconds delay
    query_vector = model.encode([emergency_description])

    # Perform the search
    k = 1  # Number of nearest neighbors
    distances, indices = db.search(query_vector, k)

    if indices is None or len(indices) == 0:
        return "Sorry, I don't have specific instructions for that emergency."

    else:
        return response_map[indices[0][0]]


In [None]:
# Generate response using LLM asynchronously
async def generate_response(prompt):
    return llm(prompt, max_length=50)[0]['generated_text']

# Main AI receptionist logic
async def ai_receptionist():
    while True:
        # Step 1: Confirm emergency or message
        response = await generate_response("Are you having an emergency, or would you like to leave a message for Dr. Adrin?")
        print(response)

        user_input = input().strip().lower()

        if "message" in user_input:
            # Step 2: Handle message
            message_prompt = "Please type your message:"
            message_response = await generate_response(message_prompt)
            print(message_response)

            user_message = input()
            thanks_response = "Thanks for the message, we will forward it to Dr. Adrin."
            print(thanks_response)
            break
        elif "emergency" in user_input:
            # Step 3: Handle emergency
            emergency_prompt = "Please describe the emergency:"
            emergency_description = input(emergency_prompt).strip().lower()

            # Async task to query vector database
            db_task = asyncio.create_task(query_vector_db(emergency_description))

            # Step 4: Continue conversation while waiting
            location_prompt = "I am checking what you should do immediately, meanwhile, can you tell me which area you are located in right now?"
            location_response = await generate_response(location_prompt)
            print(location_response)

            location = input().strip().lower()

            # Wait for the vector database response (with a delay)
            emergency_instructions = await db_task

            # Step 5: Provide ETA and handle urgency
            eta = random.randint(5, 20)  # Generate random ETA in minutes
            eta_response = f"Dr. Adrin will be coming to your location in {eta} minutes."
            print(eta_response)

            too_late_input = input("If you think the arrival will be too late, type 'too late': ").strip().lower()
            if "too late" in too_late_input:
                too_late_response = f"I understand that you are worried that Dr. Adrin will arrive too late. Meanwhile, {emergency_instructions}"
                print(too_late_response)
            else:
                final_response = "Don’t worry, please follow these steps, Dr. Adrin will be with you shortly."
                print(final_response)
            break
        else:
            # Handle unrelated topics
            misunderstanding_response = "I don’t understand that. Let's try again."
            print(misunderstanding_response)


In [None]:
# Run the AI receptionist
async def main():

    # Start the AI receptionist
    await ai_receptionist()

#asyncio.run(main())
# Instead of asyncio.run(main()), just use:
await main()

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Are you having an emergency, or would you like to leave a message for Dr. Adrin? Please call our office at (360) 734-4400. We will return your call as soon as possible.
emergency
Please describe the emergency:not breathing


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


I am checking what you should do immediately, meanwhile, can you tell me which area you are located in right now? Also, are you in a shelter or with family/friends?
I am in the Bay Area. I am at home,
I am in shyam nagar
Dr. Adrin will be coming to your location in 13 minutes.
If you think the arrival will be too late, type 'too late': too late
I understand that you are worried that Dr. Adrin will arrive too late. Meanwhile, Start CPR immediately. Push hard and fast in the center of the chest and provide rescue breaths.
