In [2]:
import pandas as pd
import json
import re

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI

from dotenv import find_dotenv, load_dotenv, dotenv_values
load_dotenv('../.env.secret')



True

## # Data

In [3]:

# Load JSON data from a file
with open('transcripts.json', 'r') as file:
    data = json.load(file)

In [63]:
# check data
for i, (k,v) in enumerate(data.items()):
    if i>2:
        break
    
    print('ID:',k,'\ndata:', v)
    print("=========================================")

ID: 2055 
data: During the visit, I examined Mr. Don Hicks, who is 81 years old and presented with a fungal infection. He had dischromic patches, nodal skin eruptions, and skin rash as symptoms. Upon examination, I confirmed the diagnosis of fungal infection. I advised Mr. Hicks to take precautions such as bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, and using clean cloths. I did not prescribe any medication for him.
ID: 291 
data: During the visit, I examined Tina Will, a 69-year-old patient who presented with symptoms of chest pain, vomiting, and breathlessness. After conducting a thorough examination, I determined that Tina was suffering from a heart attack. As a result, I advised her to seek immediate medical attention. Since there were no precautions that could be taken to prevent a heart attack, I did not prescribe any medication. Instead, I recommended that Tina follow up with her primary care physician for ongoing treatment and m

## # Parser

In [5]:
# Creating the structure data which need to scrap in specific format

class Data(BaseModel):
    is_conversation_format: bool = Field(description="is given context in conversation format: yes or no", default='no')
    name: str = Field(description="name of the patient", default='Error')
    age: int = Field(description="age of the patient between 0 to 100", default=-1)
    condition: str = Field(description="patient's condition", default='None')
    symptoms: str = Field(description="What symptoms is the patient experiencing?", default='None')
    precautions: str = Field(description="What precautions did the doctor advise?", default='None')
    medications: str = Field(description="What drugs or medications did the doctor prescribe?", default='None')
    summary: str = Field(description="create summary for doctor to help them understand patient for the next visit", default='None')
    
    # # Custom validator for the 'age' field if needed
    # @validator('age')
    # def age_must_be_between_0_and_100(cls, value: int) -> int:
    #     if not (0 <= value <= 100):
    #         raise ValueError('Age must be between 0 and 100')
    #     return value

In [6]:
parser = PydanticOutputParser(pydantic_object=Data)

In [7]:
# internal pydantic stucture representation
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"is_conversation_format": {"title": "Is Conversation Format", "description": "is given context in conversation format: yes or no", "default": "no", "type": "boolean"}, "name": {"title": "Name", "description": "name of the patient", "default": "Error", "type": "string"}, "age": {"title": "Age", "description": "age of the patient between 0 to 100", "default": -1, "type": "integer"}, "condition": {"title": "Condition", "description": "patient's condition", "default": "None", "type": "string"}, "symptoms": {"title": "Symptoms", "de

## # Prompt

In [8]:
# prompt template
template = """
yor are expert in understading the context of given data to answer the question.

context may be in 2 different format such that you will get summary from doctor or in conversation format between patient and doctor. for the conversation format, for doctor start with "D:" and for patienti "P:". So understand the context best as you can and if you don't know the anwer then say None but don't generate hulistic answer.


<context>
{context}
</context>


answer the question best as you can, it must be from the context provided so don't give hallusunated answers:
is given context conversation format: yes or no
What is the patient's name?:
What is the patient's age?:
What is the patient's condition?:
What symptoms is the patient experiencing?:
What precautions did the doctor advise?:
What drugs or medications did the doctor prescribe?:
create summary for doctor to help them understand patient for the next visit:

give me output in the following format json format:
{format_instructions}
"""

In [9]:
prompt = PromptTemplate(
    template=template,
    input_variables=["context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [11]:
# final prompt for the llm
print(prompt.invoke({'context': 'Past Your context/query here'}).text)



yor are expert in understading the context of given data to answer the question.

context may be in 2 different format such that you will get summary from doctor or in conversation format between patient and doctor. for the conversation format, for doctor start with "D:" and for patienti "P:". So understand the context best as you can and if you don't know the anwer then say None but don't generate hulistic answer.


<context>
Past Your context/query here
</context>


answer the question best as you can, it must be from the context provided so don't give hallusunated answers:
is given context conversation format: yes or no
What is the patient's name?:
What is the patient's age?:
What is the patient's condition?:
What symptoms is the patient experiencing?:
What precautions did the doctor advise?:
What drugs or medications did the doctor prescribe?:
create summary for doctor to help them understand patient for the next visit:

give me output in the following format json format:
The outp

In [12]:
# sample testing query/context
sample_query = data['2055']
print(sample_query)

During the visit, I examined Mr. Don Hicks, who is 81 years old and presented with a fungal infection. He had dischromic patches, nodal skin eruptions, and skin rash as symptoms. Upon examination, I confirmed the diagnosis of fungal infection. I advised Mr. Hicks to take precautions such as bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, and using clean cloths. I did not prescribe any medication for him.


In [16]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
chain = prompt | llm
result = chain.invoke(input={"context": sample_query})
print(result.content)

{
  "is_conversation_format": "no",
  "name": "Don Hicks",
  "age": 81,
  "condition": "Fungal infection",
  "symptoms": "Dischromic patches, nodal skin eruptions, skin rash",
  "precautions": "Bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, using clean cloths",
  "medications": "None",
  "summary": "Mr. Don Hicks, 81 years old, presented with a fungal infection showing symptoms of dischromic patches, nodal skin eruptions, and skin rash. Advised precautions for bathing and keeping the infected area dry. No medication prescribed."
}


In [18]:
result.usage_metadata

{'input_tokens': 718, 'output_tokens': 154, 'total_tokens': 872}

In [21]:
# invoking parser for testing purpose
r = parser.invoke(result)
dict(r)

{'is_conversation_format': False,
 'name': 'Don Hicks',
 'age': 81,
 'condition': 'Fungal infection',
 'symptoms': 'Dischromic patches, nodal skin eruptions, skin rash',
 'precautions': 'Bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, using clean cloths',
 'medications': 'None',
 'summary': 'Mr. Don Hicks, 81 years old, presented with a fungal infection showing symptoms of dischromic patches, nodal skin eruptions, and skin rash. Advised precautions for bathing and keeping the infected area dry. No medication prescribed.'}

## # Data Extraction:

In [22]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
chain = prompt | llm | parser

In [34]:
output = []
# check data
for i, (k,v) in enumerate(data.items()):
    # if i>11:
    #     break
    
    result = chain.invoke(input={"context": v})
    result = dict(result)
    output.append(result)
    result['id'] = k
    result['original_msg'] = v
    
    # sample printing during data extraction. # you can comment it or and set i%x, where x is higher
    if i%3==0:
        print(k, v)
        print('\nResult:\n',result)
        print("=========================================\n\n")

2055 During the visit, I examined Mr. Don Hicks, who is 81 years old and presented with a fungal infection. He had dischromic patches, nodal skin eruptions, and skin rash as symptoms. Upon examination, I confirmed the diagnosis of fungal infection. I advised Mr. Hicks to take precautions such as bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, and using clean cloths. I did not prescribe any medication for him.

Result:
 {'is_conversation_format': False, 'name': 'Don Hicks', 'age': 81, 'condition': 'Fungal infection', 'symptoms': 'Dischromic patches, nodal skin eruptions, skin rash', 'precautions': 'Bathing twice a day, using detol or neem in the bathing water, keeping the infected area dry, using clean cloths', 'medications': 'None', 'summary': 'Mr. Don Hicks, 81 years old, presented with a fungal infection showing symptoms of dischromic patches, nodal skin eruptions, and skin rash. Advised precautions for bathing and keeping the infected ar

In [56]:
output_result = pd.DataFrame(output)
output_result.head(12)

Unnamed: 0,is_conversation_format,name,age,condition,symptoms,precautions,medications,summary,id,original_msg
0,False,Don Hicks,81,Fungal infection,"Dischromic patches, nodal skin eruptions, skin...","Bathing twice a day, using detol or neem in th...",,"Mr. Don Hicks, 81 years old, presented with a ...",2055,"During the visit, I examined Mr. Don Hicks, wh..."
1,False,Tina Will,69,heart attack,"chest pain, vomiting, breathlessness",advised to seek immediate medical attention,,"Tina Will, a 69-year-old patient, presented wi...",291,"During the visit, I examined Tina Will, a 69-y..."
2,True,Tommie,45,Hypertension,"Dizziness, unsteadiness on feet, headaches","Practice meditation, take salt baths, reduce s...",,"Patient Tommie, 45 years old, presenting with ...",102,"D: Good morning Tommie, how can I help you tod..."
3,True,Chris,-1,Jaundice,"Losing weight, abdominal pain, yellow skin","Drink plenty of water, consume milk thistle, e...",,"The patient, Chris, is experiencing symptoms o...",2966,"D: Good morning, Chris. I understand you've be..."
4,True,Ernest,-1,Urinary tract infection (UTI),"Bladder discomfort, continuous feel of urine, ...","Drink plenty of water, increase vitamin C inta...",,Ernest presented with symptoms of UTI includin...,2438,"D: Hi Ernest, I understand you're here for a c..."
5,False,Carlos Murrieta,75,diabetes,"obesity, restlessness, lethargy","following a balanced diet, exercising regularl...",,"Carlos Murrieta, a 75-year-old patient, presen...",4271,"During the visit, I assessed Carlos Murrieta, ..."
6,False,Bernita Smith,35,psoriasis,"silver-like dusting on skin, joint pain, small...","wash hands with warm, soapy water, stop bleedi...",LoKara,"Patient Bernita Smith, 35 years old, diagnosed...",2441,"During the visit, I, the doctor, examined Bern..."
7,False,Garrett Brown,64,Diabetes,"Weight loss, excessive hunger, lethargy","Maintain a balanced diet, exercise regularly, ...",,"Patient Garrett Brown, 64 years old, diagnosed...",1717,"During the visit, I, the doctor, determined th..."
8,True,Nathaniel,-1,gastroenteritis,"diarrhea, vomiting, sunken eyes","stop eating solid food, take small sips of wat...",,Patient Nathaniel is experiencing symptoms of ...,3206,"D: Nathaniel, I understand you've been feeling..."
9,True,Jose,-1,Hypoglycemia,"Feeling tired, irritable, sweating a lot","Lie down on side, check pulse regularly, drink...",,Jose is experiencing symptoms of hypoglycemia ...,742,"D: Good morning, Jose. How are you feeling tod..."


In [57]:
output_result.to_csv('Sample_output.csv', index=False)