# Packages

In [42]:
!pip install openai --quiet
!pip install langchain --quiet
!pip install langchain-cohere
!pip install langchain-community --quiet



In [43]:
import os
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get("OPENAI_API_KEY")
os.environ['COHERE_API_KEY'] = userdata.get("COHERE_API_KEY")

In [44]:
from langchain.llms import OpenAI, Cohere
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [45]:
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import CSVLoader
from langchain.document_loaders import TextLoader

### Basic Document/text Loader for fetching patient reports


In [46]:
!wget https://raw.githubusercontent.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/refs/heads/main/patienthistory.md

loader = TextLoader("./patienthistory.md")
loaded_text= loader.load()
print(type(loaded_text))
print(loaded_text)

--2025-01-02 07:39:38--  https://raw.githubusercontent.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/refs/heads/main/patienthistory.md
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1203 (1.2K) [text/plain]
Saving to: ‘patienthistory.md.2’


2025-01-02 07:39:39 (54.2 MB/s) - ‘patienthistory.md.2’ saved [1203/1203]

<class 'list'>
[Document(metadata={'source': './patienthistory.md'}, page_content='Patient’s clinical history:\nPlease also state the source of the information (e.g. from medical records, from the patient, from the applicant etc.).\nMr Tan Ah Kow was accompanied by his son, Mr Tan Ah Beng, for the examination. Mr Tan is a 55 year old man, who is divorced, and unemployed. Mr Tan is currently living with his son, Ah Beng, in 

In [47]:
#llm=OpenAI(temperature=0)
llm=Cohere()

template="""
read the following review and extract the following information:
Name of the patient
Disease present
Diagnosis

review is given here : {input_review}
"""

prompt=PromptTemplate(template=template,
                       input_variables=["input_review"])

chain=LLMChain(llm=llm,
               prompt=prompt)

result=chain.invoke({"input_review":loaded_text})
print(result['text'])

 Sure, the review contains the following information: 

- Name of the patient: Mr. Tan Ah Kow
- Disease present: Hypertension, hyperlipidemia, strokes, cardiomyopathy, cardiac failure, chronic renal disease, and dementia. 
- Diagnosis: The patient has been diagnosed with dementia, confirmed by medical professionals at Blackacre Hospital. 

Please let me know if you would like some more information extracted, and I will be happy to do so. 


# CSV Loader

In [48]:
from langchain_community.document_loaders.csv_loader import CSVLoader
!wget https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/Leads.csv

loader = CSVLoader(file_path="./Leads.csv")
csv_file_data = loader.load()
print(type(csv_file_data))
print(csv_file_data)

--2025-01-02 07:39:43--  https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/Leads.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1493 (1.5K) [text/plain]
Saving to: ‘Leads.csv.1’


2025-01-02 07:39:43 (20.9 MB/s) - ‘Leads.csv.1’ saved [1493/1493]

<class 'list'>
[Document(metadata={'source': './Leads.csv', 'row': 0}, page_content='Week_num: 1\nLeads: 756.48\nPromotion_Budget: 51735.6'), Document(metadata={'source': './Leads.csv', 'row': 1}, page_content='Week_num: 2\nLeads: 878.72\nPromotion_Budget: 64608.6'), Document(metadata={'source': './Leads.csv', 'row': 2}, page_content='Week_num: 3\nLeads: 857.92\nPromotion_Budget: 63833'), Document(metadata={'source': './Leads.csv', 'row': 3}, page_content='Week_num: 4\nLeads: 715.84\nPromot

In [49]:
print(csv_file_data)

[Document(metadata={'source': './Leads.csv', 'row': 0}, page_content='Week_num: 1\nLeads: 756.48\nPromotion_Budget: 51735.6'), Document(metadata={'source': './Leads.csv', 'row': 1}, page_content='Week_num: 2\nLeads: 878.72\nPromotion_Budget: 64608.6'), Document(metadata={'source': './Leads.csv', 'row': 2}, page_content='Week_num: 3\nLeads: 857.92\nPromotion_Budget: 63833'), Document(metadata={'source': './Leads.csv', 'row': 3}, page_content='Week_num: 4\nLeads: 715.84\nPromotion_Budget: 50649.2'), Document(metadata={'source': './Leads.csv', 'row': 4}, page_content='Week_num: 5\nLeads: 772.48\nPromotion_Budget: 60965.8'), Document(metadata={'source': './Leads.csv', 'row': 5}, page_content='Week_num: 6\nLeads: 714.88\nPromotion_Budget: 47608.4'), Document(metadata={'source': './Leads.csv', 'row': 6}, page_content='Week_num: 7\nLeads: 815.04\nPromotion_Budget: 63597.8'), Document(metadata={'source': './Leads.csv', 'row': 7}, page_content='Week_num: 8\nLeads: 691.84\nPromotion_Budget: 4951

In [50]:
#llm=OpenAI(temperature=0)
llm=Cohere()

template="""
read the following data and extract the following information:

What is the average promotional budget ?
What is the average Leads?

data is given here : {input_data}
"""

prompt=PromptTemplate(template=template,
                       input_variables=["input_data"])

chain=LLMChain(llm=llm,
               prompt=prompt)

In [51]:
result=chain.invoke({"input_data":csv_file_data})
print(result['text'])

 Based on the provided data, here is the information you requested:

1. The average promotional budget is approximately 53950.68 USD, calculated by averaging the promotion budget for each week.
2. The average leads are approximately 852.16, calculated by averaging the leads for each week. 

While these values are the average over the provided dataset, it should be noted that the range of values varies significantly, with a minimum of 374.04 leads and a maximum of 1539.44 leads in a week, and a minimum of 3795.6 USD and a maximum of 101213 USD in promotional budget for a week. 


#### However, the loader commonly creates an issue with Large Datasets giving a Max Tokens issue.

#### PyPdf Loader can be used to fetch the MR Report to summarize the data from the file

In [52]:
!pip install pypdf



In [65]:
from langchain.document_loaders import PyPDFLoader

!wget https://github.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/raw/main/SampleMR.pdf
loader=PyPDFLoader(file_path="SampleMR.pdf")
pdf_file_data=loader.load()
print(type(pdf_file_data))
print(pdf_file_data)

--2025-01-02 07:42:39--  https://github.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/raw/main/SampleMR.pdf
Resolving github.com (github.com)... 140.82.112.4
Connecting to github.com (github.com)|140.82.112.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/main/SampleMR.pdf [following]
--2025-01-02 07:42:39--  https://raw.githubusercontent.com/harshitgupta1998/MedChain-Leveraging-AI-for-Medical-Applications/main/SampleMR.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41628 (41K) [application/octet-stream]
Saving to: ‘SampleMR.pdf.2’


2025-01-02 07:42:40 (2.56 MB/s) - ‘SampleMR.pdf.2’ saved [41628/41628

In [66]:
#llm=OpenAI(temperature=0)
llm=Cohere()

template="""
read the following review and extract the following information:
Name of the patient
Disease present
Diagnosis

review is given here : {input_review}
"""

prompt=PromptTemplate(template=template,
                       input_variables=["input_review"])

chain=LLMChain(llm=llm,
               prompt=prompt)

result=chain.invoke({"input_review":pdf_file_data})
print(result['text'])

 ```json
{
 "patient_name":"Tan Ah Kow",
 "present_disease":"Dementia",
 "diagnosis":[
  "Dementia",
  "Stroke"
 ]
}
``` 

The provided review states that the patient, Mr. Tan Ah Kow, has been manifesting behavioral and psychological symptoms secondary to dementia. He has a history of medical conditions, including hypertension and hyperlipidemia since 1990, and suffered several strokes in 2005. He was diagnosed with dementia in 2015 after being transferred to Blackacre Hospital for stroke rehabilitation. His failure to remember basic information such as his age, and the address where he lives, coupled with his inability to understand, retain, use or weigh simple information, due to his memory deficits and cognitive failures, lead to the diagnosis of dementia. 


# Output Parsers

#### Datetime Parser can be used to fetch the date and time from various reports and format.

In [55]:
from langchain.output_parsers import DatetimeOutputParser
output_parser = DatetimeOutputParser()
format_instructions=output_parser.get_format_instructions()
print(format_instructions)

Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 47-05-30T22:36:43.163190Z, 1457-07-26T06:55:54.040596Z, 1255-02-26T16:15:19.203323Z

Return ONLY this string, no other words!


In [56]:
Server_Logs =[
    "[2024-04-01 13:48:11] ERROR: Failed to connect to database. Retrying in 60 seconds.",
    "[2023-08-04 12:01:00 AM - Warning: The system is running low on disk space.",
    "[04-01-2024 13:55:39] CRITICAL: System temperature exceeds safe threshold. Initiating shutdown",
    "[Monday, April 01, 2024 01:55:39 PM] DEBUG: User query executed in 0.45 seconds.",
    "[13:55:39 on 2024-04-01] ERROR: Unable to send email notification. SMTP server not responding."
]

In [57]:
template="""
Read the server log text and extract the date and time
log text is: {log_text}
{format_instructions}
"""
prompt=PromptTemplate(template=template,
                      input_variables=["log_text", "format_instructions"])

#llm=OpenAI(temperature=0)
llm=Cohere()
chain=LLMChain(prompt=prompt,
               llm=llm)

In [58]:
for log_message in Server_Logs:
  result=chain.invoke({"log_text":log_message,
                      "format_instructions":format_instructions })
  print(result["text"],";", log_message )

 2024-04-01T13:48:11.000000Z ; [2024-04-01 13:48:11] ERROR: Failed to connect to database. Retrying in 60 seconds.
 2024-04-01T13:55:39.000000Z ; [04-01-2024 13:55:39] CRITICAL: System temperature exceeds safe threshold. Initiating shutdown
 2024-04-01T13:55:39.000Z ; [Monday, April 01, 2024 01:55:39 PM] DEBUG: User query executed in 0.45 seconds.
 2024-04-01T13:55:39.000Z ; [13:55:39 on 2024-04-01] ERROR: Unable to send email notification. SMTP server not responding.


# Custom Parser - Using Pydantic

Pydantic is a handy tool for making sure the information (or data) your Python program receives is exactly what you expect. With Pydantic, you tell your program what kind of data it should accept (like numbers, text, or dates) using simple rules. If the data matches the rules, your program works smoothly. If not, Pydantic helps by pointing out the problem, making it easier to keep your program safe and error-free.

### EmailResponse App to handle patient complaints in five languages like Spainish, Hindi, Russian, Telugu, and French.

In [59]:
import random as rand
import requests
from langchain.document_loaders import WebBaseLoader
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

In [60]:
email_location="https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/Customer_Emails/Mail"+str(rand.randint(1,5))+".txt"
print(email_location)

loader = WebBaseLoader(email_location)
loaded_text= loader.load()
print(type(loaded_text))
final_mail=loaded_text[0].page_content
print(final_mail)

https://raw.githubusercontent.com/venkatareddykonasani/Datasets/master/Customer_Emails/Mail2.txt
<class 'list'>
Estimado soporte al cliente,

Me dirijo a ustedes para reportar tres problemas que he encontrado con su servicio:
primero, mi último pedido llegó con retraso; segundo, el producto recibido no coincide con lo que pedí;
y tercero, no he podido contactar con atención al cliente por teléfono. Agradecería su pronta respuesta y una solución a estos inconvenientes.
Mi número de cliente es CFC16785.

Atentamente,
Lynda


In [61]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

class EmailResponse(BaseModel):
  Email_Language: str= Field(description= "The Original Language of the Email")
  Customer_ID: str= Field(description= "The Customer ID mentioned in the mail")
  English_email: str= Field(description= "The email after translating to English")
  Summary: str= Field(description= "A 4 bullets point summary of the email")
  Reply: str= Field(description= "A polite 2 line reply to the email")

custom_output_parser= PydanticOutputParser(pydantic_object=EmailResponse)
print(custom_output_parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"Email_Language": {"description": "The Original Language of the Email", "title": "Email Language", "type": "string"}, "Customer_ID": {"description": "The Customer ID mentioned in the mail", "title": "Customer Id", "type": "string"}, "English_email": {"description": "The email after translating to English", "title": "English Email", "type": "string"}, "Summary": {"description": "A 4 bullets point summary of the email", "title": "Summary", "type": "string"}, "Reply": {"description": "A polite 2 line reply to the email", "title": 

In [62]:
template="""
Take the email as input. Email text is {email}
{format_instructions}
"""
prompt=PromptTemplate(template=template,
                      input_variables=["email","format_instructions"])

llm=Cohere()

chain=LLMChain(prompt=prompt,
               llm=llm)

result=chain.invoke({"email":final_mail,
                     "format_instructions":custom_output_parser.get_format_instructions()})
print(result["text"])

 {
    "properties": {
        "Email_Language": "Spanish",
        "Customer_ID": "CFC16785",
        "English_email": "Dear support team, \n\nI would like to bring to your attention three issues I have encountered while using your service: \n1. My latest order arrived late. \n2. The received product did not match my order. \n3. I was unable to reach customer support by phone. \n\nI would appreciate a quick response and an appropriate solution to these inconveniences. \n\nBest regards, \nLynda (Customer ID: CFC16785)",
        "Summary": "Three issues; late delivery, incorrect product, and no response from phone customer support.",
        "Reply": "Thank you for bringing this to our attention, Lynda. We apologize for the inconvenience you have experienced. We will work to improve our services and ensure this type of situation does not happen again. Please allow us to rectify this immediately by contacting us at the toll-free number on our website. Thank you for being a valued custome