In [1]:
import os
import requests
from dotenv import load_dotenv
from lingua import LanguageDetectorBuilder, Language
from deep_translator import GoogleTranslator
import gradio as gr
import re

load_dotenv()

# Load environment variables
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AI_SEARCH_ENDPOINT = os.getenv("AI_SEARCH_ENDPOINT")
AI_SEARCH_SEMANTIC = os.getenv("AI_SEARCH_SEMANTIC")
AI_SEARCH_KEY = os.getenv("AI_SEARCH_KEY")
AI_SEARCH_INDEX = os.getenv("AI_SEARCH_INDEX")

# Initialize language detector
detector = LanguageDetectorBuilder.from_all_languages().build()

# Function to extract a limited number of message history for GPT request
def get_history_messages(histories):
    history_list = list()

    history_length = 5  # Define maximum number of message pairs to keep
    history_index = 0

    # Iterate through the history of messages
    for history in histories:
        
        if history_index >= history_length:
            break
        # Separate user and assistant messages
        message1 = history[0]
        message2 = history[1]
        
        # Add the messages to the list in a structured format
        history_list.append({
            "role": "assistant",
            "content": message1
        })
        history_list.append({
            "role": "assistant",
            "content": message2               
        })
        
        history_index += 1
        
    return history_list

# Function to request GPT for a response
def request_gpt(prompt, history_list, detected_lan):
    headers = {"Content-Type": "application/json", "api-key": AZURE_OPENAI_API_KEY}
    message_list = list()
    print(prompt)
    print(detected_lan)
    
    # System role message to instruct GPT
    message_list.append({
        "role": "system",
        "content": f"You are an assistant for medical professionals.Always answer in {detected_lan}. If you cannot, switch to English. When asked about a disease, search the 'disease' section of the provided data. If found, answer based on the document."
    })
    # Add message history to the payload
    message_list.extend(history_list)
    # Add the user prompt to the payload
    message_list.append({
        "role": "user",
        "content":prompt
    })
    # Define the payload with GPT settings and Azure Search configuration
    payload = {
        "messages": message_list,
        "temperature": 0.1,
        "top_p": 0.6,
        "max_tokens": 800,
        "data_sources": [
        {
            "type": "azure_search",
            "parameters": {
                "endpoint": AI_SEARCH_ENDPOINT,
                "semantic_configuration": AI_SEARCH_SEMANTIC,
                "query_type": "semantic",
                "strictness": 5,
                "top_n_documents": 5,
                "key": AI_SEARCH_KEY,
                "indexName": AI_SEARCH_INDEX
            }
        }
        ]
    }
    
    response = requests.post(AZURE_OPENAI_ENDPOINT, headers=headers, json=payload)
    
    if response.status_code == 200:
        response_json = response.json()
        content = response_json["choices"][0]["message"]["content"]
        if content == "The requested information is not available in the retrieved data. Please try another query or topic.":
            return content, None
        # Check if there are any citations in the response
        if response_json["choices"][0]["message"]["context"]:
            citations = response_json["choices"][0]["message"]["context"]["citations"]
            formatted_citation_list = list()
            i = 0
            for c in citations:
                i += 1
                temp = f"<details><summary>Doc{i}</summary><ul>{c['content']}</ul></details>"
                formatted_citation_list.append(temp)
                
        else:
            formatted_citation_list = list() # No citations
            
        text = "".join(formatted_citation_list)
      

        # Extract chunk and disease values using regular expressions
        chunk_match = re.findall(r'"chunk"\s*:\s*"([^"]+)"', text)
        disease_match = re.findall(r'"disease"\s*:\s*"([^"]+)"', text)
        source_match = re.findall(r'"source"\s*:\s*"([^"]+)"', text)

        citation_text = []

        for idx, (chunk, disease, source) in enumerate(zip(chunk_match, disease_match, source_match), start=1):
            # Set it to expand when clicked
            citation_t = f"""
            <details>
                <summary>Doc{idx}</summary>
                <h3>Original Text</h3>
                <span>{chunk}</span> 
                <h3>Data Sources</h3>
                <span><b>disease</b>: {disease}, <b>source</b>: {source}</span>                
                </details>
                <br>
            """

            citation_text.append(citation_t)

        citation_html = "\n".join(citation_text)

        return content, citation_html


    else:
        return f"{response.status_code}, {response.text}", ""

def detect_language(text):
    """Detect the language of a given text"""
    if re.search(r"[가-힣]", text):
        return "korean"  # If Korean characters are included, always return 'korean'   
    detected_language = detector.detect_language_of(text)    
    if detected_language is not None:
        return detected_language.name.lower()  
    return "unknown"  # Return 'unknown' if detection fails

def translate_to_english(text):
    """Translate user input to english using Google Translator API"""
    try:
        translator = GoogleTranslator(source='auto', target='en')
        return translator.translate(text)
    except Exception as e:
        print(f"Translation error: {e}")
        return text  # Return the original text if translation fails


# Function to handle user prompt submission
def click_send(prompt, histories):
    # Retrieve message history for the GPT request
    history_list = get_history_messages(histories=histories)
    detected_lan = detect_language(prompt)
    trans_prompt = translate_to_english(prompt)
    # Send the prompt and history to GPT and get the response
    response_text, citation_html = request_gpt(trans_prompt, history_list, detected_lan)
    histories.append((prompt, response_text))
    return histories, "", citation_html



### Web UI ###
with gr.Blocks() as demo:
    
    # Main Interaction Area
    with gr.Row():
        with gr.Column(scale=3):   
            # Chatbot, Citation area
            with gr.Row(elem_id="chatbot-container"):
                chatbot = gr.Chatbot(label="Chat history", elem_classes="chatbot", height=600)
                citation = gr.HTML(label="reference area", elem_classes="citation-box")
            # Input Box and Submit Button
            with gr.Row(elem_id="input-container"):
                input_openai_textbox = gr.Textbox(label="", elem_id="textbox", scale=7, placeholder="질문을 입력하세요...")
                send_button = gr.Button("Submit", elem_id="button", scale=1)
            
            # Connect the chatbot and button functionality
            input_openai_textbox.submit(fn=click_send, inputs=[input_openai_textbox, chatbot], outputs=[chatbot, input_openai_textbox, citation])
            send_button.click(fn=click_send, inputs=[input_openai_textbox, chatbot], outputs=[chatbot, input_openai_textbox, citation])

        
demo.launch()



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Please tell me the treatment guidelines for Cancelman's disease
korean
Please tell me the treatment guidelines for Castleman Disease
korean
Please tell me if adalimumab works
korean
Tell me the survival rate for iMCD (infiltrative multiple myeloma of unknown etiology)
korean
Tell me the iMCD survival rate
korean
Tell me the Castleman Disease survival rate
korean
Is there any information about iMCD?
korean
Tell me the survival rate of iMCD
korean
Treatment guidelines?
korean
Could Adalimumab Be Used as a Treatment?
korean
Adalimumab
korean
