In [None]:
import os
import json
import re
import predictionguard as pg
import pandas as pd
from getpass import getpass

In [None]:

pg_access_token = getpass('Enter your Prediction Guard access token: ')
os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token

In [None]:
with open('final_transcripts.json', 'r') as f:
    transcripts = json.load(f)

In [None]:
transcripts["2966"]

In [None]:
def extract_answer_name(transcript):
    prompt = f'''
        ### Instruction: 
        Identify the name of the patient from the transcript. Just provide the name. No other text.  
        
        ### Input: {transcript}
        
        ### Response:
        
    '''.format(transcript=transcript)
    return execute_predictionguard(prompt)

def extract_answer_age(transcript):
    prompt = f'''
        ### Instruction: 
        Determine the patient's age in years from the transcript. Provide only the number. No years old.
        
        ### Input: 
        {transcript}
        
        ### Response:
        
    '''.format(transcript=transcript)
    try:
        result = pg.Completion.create(
            model="Nous-Hermes-Llama2-13B",
            prompt=prompt,
            temperature=0.3,
            max_tokens=100,
            output={
                "type": "integer"
            }
        )
        answer = result['choices'][0]['text']
        return answer.strip()

    except KeyError:
        return "Error: Question does not have the expected placeholder or is not in the prompts dictionary."

    except Exception as e:
        error_message = str(e)
        if "Could not make prediction" in error_message:
            print("Failed Prompt:", prompt)  
            return "None"
        else:
            return f"Error: {error_message}"

def extract_answer_condition(transcript):
    prompt = f'''
        ### Instruction: Identify the primary medical condition of the patient from the transcript. List the condition.
        ### Input: {transcript}
        ### Response:
    '''.format(transcript=transcript)
    return execute_predictionguard(prompt)

def extract_answer_symptoms(transcript):
    prompt = f'''
        ### Instruction: 
        Identify the symptoms experienced by the patient as mentioned in the transcript.
        Return the symptoms in quotations and list them by using commas.
        Do not use more than one quotations.
        If there are only two symptoms, use the word 'and' instead of commas.
        Do not use numbered lists and remove any pronouns. Do not return the diagnosis. Do not use full sentences.
        Remove patient names and only return the symptoms.
        
        ### Input:
        {transcript}
        
        ### Response:
        
    '''.format(transcript=transcript)
    return execute_predictionguard(prompt)

def extract_answer_precautions(transcript):
    prompt = f'''
        ### Instruction: List the precautions advised by the doctor in the transcript. List the precautions.
        ### Input: {transcript}
        ### Response:
    '''.format(transcript=transcript)
    return execute_predictionguard(prompt)

def extract_answer_drug(transcript):
    prompt = f'''
        ### Instruction: 
        Identify the drug or medication prescribed by the doctor in the transcript.
        Return the drugs or medication in quotations and list them by using commas.
        If nothing is prescribed, return "no prescription".
        
        ### Input:
        {transcript}
        
        ### Response:
        
    '''.format(transcript=transcript)
    return execute_predictionguard(prompt)

def execute_predictionguard(prompt):
    try:
        result = pg.Completion.create(
            model="Nous-Hermes-Llama2-13B",
            prompt=prompt,
            max_tokens=100
        )
        answer = result['choices'][0]['text']
        return answer.strip()

    except KeyError:
        return "Error: Question does not have the expected placeholder or is not in the prompts dictionary."

    except Exception as e:
        error_message = str(e)
        if "Could not make prediction" in error_message:
            print("Failed Prompt:", prompt)  
            return "None"
        else:
            return f"Error: {error_message}"


In [None]:
def determine_extraction_function(filename):
    if "name" in filename:
        return extract_answer_name
    elif "age" in filename:
        return extract_answer_age
    elif "condition" in filename:
        return extract_answer_condition
    elif "symptoms" in filename:
        return extract_answer_symptoms
    elif "precautions" in filename:
        return extract_answer_precautions
    elif "drug" in filename:
        return extract_answer_drug
    else:
        return None

csv_files = [
    "name.csv",
    "age.csv",
    "condition.csv",
    "symptoms.csv",
    "precautions.csv",
    "drug.csv"
]

In [None]:

data_df = pd.read_csv('condition.csv').head(6)

for idx, row in data_df.iterrows():
    transcript = transcripts[str(row['Transcript'])]
    answer = extract_answer_condition(transcript)
    data_df.at[idx, 'Text'] = answer


In [None]:
data_df

In [None]:
data_df.to_csv('Processed_symtoms_2.csv', index=False)