# Import Required Libraries

In [None]:
import os
from dotenv import load_dotenv
from dotenv import load_dotenv, find_dotenv
import PyPDF2
import re
import openai
import time
import google.generativeai as genai
import json, requests
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import spacy
from nrclex import NRCLex
from textstat import flesch_kincaid_grade
from geopy.geocoders import Nominatim
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
# from langchain_community.vectorstores import FAISS
import gradio as gr
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
import xml.etree.ElementTree as ET
from geopy.geocoders import Nominatim

# Environment Variables

In [3]:
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ['OPEN_API_KEY']
GOOGLE_API_KEY=os.environ['GOOGLE_API_KEY']
genai.configure(api_key=GOOGLE_API_KEY)
hf_api_key =  os.environ['HF_API_KEY']
HF_SUMMARY_ENDPOINT = "https://api-inference.huggingface.co/models/sshleifer/distilbart-cnn-12-6"
HF_NER_ENDPOINT = "https://api-inference.huggingface.co/models/flair/ner-english-ontonotes-large"
HF_SENTIMENT_ENDPOINT = "https://api-inference.huggingface.co/models/finiteautomata/bertweet-base-sentiment-analysis"
HF_EMOTION_ENDPOINT = "https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base"
HF_TONE_ENDPOINT = "https://api-inference.huggingface.co/models/yiyanghkust/finbert-tone"
HF_ENGMAT_ENDPOINT = "https://api-inference.huggingface.co/models/j-hartmann/ambiguity-distilroberta-base"

# Text Extraction From various Files (eg:pdf,xml,..ect)

In [4]:
"""Utility function for text extraction"""

def pdf_2_txt(pdf_path):
    try:
        pdf_file = open(pdf_path, 'rb')
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text_content = ' '
        for page_number in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_number]
            text_content += page.extract_text()
        pdf_file.close()
        text_content = re.sub(r'\s+',' ',text_content)
    except Exception as e:
        print("Error:",e)
    return text_content


def xml_2_text(xml_file_path):
    try:
        # Parse the XML file
        tree = ET.parse(xml_file_path)
        root = tree.getroot()
    
        # Function to recursively extract text from XML elements
        def extract_text(element):
            text = element.text if element.text else ""
            for child in element:
                text += extract_text(child)
                if child.tail:
                    text += child.tail
            return text
    
        # Extract text from the root element
        text = extract_text(root)
        text = re.sub(r'\s+',' ',text)
        return  text
    except Exception as e:
        print("Error:",e)
    return text

# Patient Analysis using openAI 

In [5]:
"""OPENAI"""

def openai_summary(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": patient_report + "\n\n Summarize the above patient report"}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        summary = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(20)
        return openai_summary(patient_report)
    return summary

def openai_sentiment(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": "provide the sentiment of  below patient report in any of the below options: Positive,Negative,Neutral\n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        sentiment = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(20)
        return openai_sentiment(patient_report)
    words = ['Positive','Negative','Neutral']
    for word in words:
        if word in sentiment:
            res_sentiment = word
            break
        else:
            res_sentiment = "No sentiment"
    return res_sentiment

def openai_ner(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": "provide the Named Entities such as hospital names, patient names, doctor names,locations,medication names, and dates mentioned in the patient report\n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        entities = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(20)
        return openai_ner(patient_report)
    return entities

def openai_emotion(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": "Predict the emotion based on patient report from provided options : [ 'Happiness', 'Sadness', 'Anger', 'Fear', 'Suprise', 'Disgust']\n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        emotion = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(60)
        return openai_emotion(patient_report)
    words = [ 'Happiness', 'Sadness', 'Anger', 'Fear', 'Suprise', 'Disgust']
    for word in words:
        if word in emotion:
            res_emotion = word
            break
        else:
            res_emotion = "No emotion"
    return res_emotion

def openai_tone(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": "Predict the Tone based on patient report from provided options : ['FORMAL', 'INFORMAL', 'OPTIMISTIC', 'HARSH']\n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        tone = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(60)
        return openai_tone(patient_report)
    words = ['FORMAL', 'INFORMAL', 'OPTIMISTIC', 'HARSH']
    for word in words:
        if word in tone:
            res_tone = word
            break
        else:
            res_tone = "No Tone"
    return res_tone

def openai_englishmaturity(patient_report):
    try:
        openai.api_key = openai_api_key
        prompt = [{"role": "user", "content": "Predict the English Maturity of the  patient report from provided options : ['AVERAGE', 'MEDIUM', 'PROFICIENT', 'LOW']\n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        Engmat = completion.choices[0].message.content
    except Exception as e:
        print("Error:",e)
        time.sleep(60)
        return openai_englishmaturity(patient_report)
    words = ['AVERAGE', 'MEDIUM', 'PROFICIENT', 'LOW']
    for word in words:
        if word in Engmat:
            res_engmat = word
            break
        else:
            res_engmat = "NA"
    return res_engmat


def openai_timeline(patient_report):
    try:
        openai.api_key = openai_api_key
        
        prompt = [{"role": "user", "content": "List the Important Events with Time,Event Type,Event Description Group By Event type based on the below patient report  \n\n" + patient_report}]
        
        # prompt = [{"role": "user", "content": "List the Patient Events  in json format to save it to csv from json with one column as Event and another column with Date based on the below patient report  \n\n" + patient_report}]
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # this is "ChatGPT" $0.002 per 1k tokens
            messages=prompt,
            temperature=0)
        data = completion.choices[0].message.content
        
    except Exception as e:
        print("Error:",e)
        time.sleep(60)
        return openai_timeline(patient_report)
    return data


# Patient Analysis using gemini pro

In [6]:
"""Gemini"""

def gemini_summary(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "summarize the below patient report \n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_sentiment(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "provide the sentiment of  below patient report in any of the below options: Positive,Negative,Neutral\n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_NER(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "provide the Named Entities such as hospital names, patient names, doctor names,locations,medication names, and dates mentioned in the patient report as key value pairs\n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_emotion(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Predict the emotion based on patient report from provided options : [ 'Happiness', 'Sadness', 'Anger', 'Fear', 'Suprise', 'Disgust']\n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_tone(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Predict the Tone based on patient report from provided options : [ 'FORMAL', 'INFORMAL', 'OPTIMISTIC', 'HARSH']\n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_englishmaturity(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Predict the English Maturity of the  patient report from provided options : [ 'AVERAGE', 'MEDIUM', 'PROFICIENT', 'LOW']\n\n" + patient_report
    response = model.generate_content(prompt)
    return response.text

def gemini_determine_sentiment_highlights(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Given a patient Report \n\n" + patient_report +"\n\n Provide the key words or phrases that strongly contribute to determining the sentiment of the patient report"
    response = model.generate_content(prompt)
    return response.text

def gemini_determine_tone_highlights(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Given a patient Report \n\n" + patient_report +"\n\n Provide the key words or phrases that strongly contribute to determining the tone of the patient report"
    response = model.generate_content(prompt)
    return response.text

def gemini_determine_emotion_highlights(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Given a patient Report \n\n" + patient_report +"\n\n Provide the key words or phrases that strongly contribute to determining the emotion of the patient report"
    response = model.generate_content(prompt)
    return response.text

def gemini_determine_englishmaturity_highlights(patient_report):
    model = genai.GenerativeModel('gemini-pro')
    prompt = "Given a patient Report \n\n" + patient_report +"\n\n Provide the key words or phrases that strongly contribute to determining the English Maturity of the patient report"
    response = model.generate_content(prompt)
    return response.text


In [12]:
"""GDrive Files"""
def get_files_from_gdrive():
    # Set the path to your credentials file
    credentials_file = r"C:\Users\rakeshvmadmin\Desktop\Summarization\Specific Encounter Examples\client_secret.json"
    
    # Define the scope for accessing Google Drive
    scopes = ['https://www.googleapis.com/auth/drive.metadata.readonly']
    
    # Set the path to the token file
    token_file = './token.json'
    
    # Check if token file exists, otherwise authenticate the user
    if os.path.exists(token_file):
        credentials = Credentials.from_authorized_user_file(token_file, scopes)
    else:
        flow = InstalledAppFlow.from_client_secrets_file(credentials_file, scopes)
        credentials = flow.run_local_server(port=0)
    
    # Save the credentials for future use
    with open(token_file, 'w') as token:
        token.write(credentials.to_json())
    
    # Build the Google Drive API service
    drive_service = build('drive', 'v3', credentials=credentials)
    
    # Specify the folder ID of the Google Drive folder you want to list
    folder_id = '11Y03gG0RGT2ulqL4X_CNf2BQNy4eVhmj'
    
    # Call the Drive API to list files in the folder
    results = drive_service.files().list(q=f"'{folder_id}' in parents", fields="files(name)").execute()
    files = results.get('files', [])
    
    # Print the names of the files in the folder
    if not files:
        print('No files found.')
    else:
        Files = []
        print('Files:')
        for file in files:
            Files.append(file['name'])
    return Files

In [17]:
def gradio_main(patient_name,Model,len,history):
    current_directory = os.getcwd()
    folder_name = "Data"
    # path = r"C:\Users\rakeshvmadmin\Desktop\Summarization\Specific Encounter Examples\Amy_Cripto_EncounterDetails.pdf"
    path = os.path.join(current_directory, folder_name, patient_name)
    if patient_name.endswith(".pdf"):
        patient_report = pdf_2_txt(path)
    if patient_name.endswith(".xml"):
        patient_report = xml_2_text(path)
    summary_text,sentiment,emotion,tone,engmat,timeline=None,None,None,None,None,None
    if Model == "openAI":
        summary_text = openai_summary(patient_report)
        sentiment = openai_sentiment(patient_report)
        emotion = openai_emotion(patient_report)
        tone = openai_tone(patient_report)
        engmat = openai_englishmaturity(patient_report)
        timeline = openai_timeline(patient_report)
    else:
        summary_text = gemini_summary(patient_report)
        sentiment = gemini_sentiment(patient_report)
        emotion = gemini_emotion(patient_report)
        tone = gemini_tone(patient_report)
        engmat = gemini_englishmaturity(patient_report)
        timeline = openai_timeline(patient_report)
    return summary_text,sentiment,emotion,tone,engmat,timeline

gr.close_all()
demo = gr.Interface(fn=gradio_main,
                inputs=[    gr.components.Dropdown(label="Select Patient Report", choices=get_files_from_gdrive()),
                            gr.components.Dropdown(label="Select Model", choices=["openAI","Gemini Pro"]),
                            gr.Slider(100, 500, step=100, value=100, label="Summarize Length", info="Summarize between 100-500 words"),
                            gr.Checkbox(label="Yes", info="Include previous history and trends ?")
                           ],
                    outputs=[gr.Textbox(label="Patient Report Summary", lines=10,max_lines=7),
                             gr.Textbox(label="sentiment", lines=2,max_lines=7),
                             gr.Textbox(label="Emotion", lines=2,max_lines=7),
                             gr.Textbox(label="Tone", lines=2,max_lines=7),
                             gr.Textbox(label="English Maturity", lines=2,max_lines=7),
                             gr.Textbox(label="Patient_TIMELINE", lines=10,max_lines=7),
                            ],
                    title="CharmHealth CodeRx Hackathon",
                    description="THEME : `Clinical Summary Challenge`"
                   )
demo.launch()

Closing server running on port: 7861
Closing server running on port: 7861
Closing server running on port: 7861
Closing server running on port: 7861
Files:
Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




Error: Rate limit reached for gpt-3.5-turbo in organization org-OodEu4Y24o1DToD4VZIdNX7S on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.
