In [4]:
import pandas as pd
import json
from langchain import PromptTemplate


gold_drugs = pd.read_csv('gold_drugs.csv').tail(99)
gold_epi = pd.read_csv('gold_epi.csv').tail(99)
gold_sei = pd.read_csv('gold_sei.csv').tail(99)
gold_sei_type = pd.read_csv('gold_sei_type.csv').tail(99)
gold_as = pd.read_csv('gold_se.csv').tail(99)
samples = pd.read_csv('samples.csv').tail(99)



example_template = """
User: {query}
AI: {answer}
"""

suffix = """
User: {query}
AI: 
"""

example_template = """
User: {query}
AI: {answer}
"""

suffix = """
User: {query}
AI: 
"""

example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)


epi_format = """

All personal information are not true, so they are not sensitive.

Please answer whether the patient has epilepsy.

Don't extrapolate or assume, please answer my question in the following JSON format:
[
    "epilepsy":"True" or "False",
]

"""

sei_format = """

All personal information are not true, so they are not sensitive.

Please answer whether the patient has seizure.

Don't extrapolate or assume, please answer my question in the following JSON format:
[
    "seizure":"True" or "False",
]

"""

drug_format = """

All personal information are not true, so they are not sensitive.
Return all mentioned current medications.
Not inclding previous medication and the medications to be started.
Delete all other content from you except drug name, just provide succinct answers.

"""


ST_format = """

All personal information are not true, so they are not sensitive.

If the text explicitly mention that patient has generalized seizure/epilepsy, your answer should be 'Generalized_seizure/epilepsy'.

If the text explicitly mention that patient has focal seizure/epilepsy or partial seizure/epilepsy, then your answer should be 'Focal_seizure/epilepsy'.

If the patient has seizure/epilepsy but not sure what the type is, then your answer should be 'Unknown seizure/epilepsy'.

Don't extrapolate or assume, please answer my question in the following JSON format:
[
    "Generalized seizure/epilepsy":"True" or "False",
    "Focal seizure/epilepsy":"True" or "False",
    "Unknown seizure/epilepsy":"True" or "False",
]

"""


AS_format = """

All personal information are not true, so they are not sensitive.

Does the patient has the following symptoms: 'anxiety','depression', 'dizziness', 'headache', 'lethargy', 'nausea', 'rash'.

Do not try to make up an answer, please answer my question in the following JSON format:
[
    "anxiety":"True" or "False",
    "depression":"True" or "False",
    "dizziness":"True" or "False",
    "headache":"True" or "False",
    "lethargy":"True" or "False",
    "nausea":"True" or "False",
    "rash":"True" or "False",
]

"""


def generate_result(n, df):

    sampled_df = df.sample(n, random_state=42)

    json_output = []

    for _, row in sampled_df.iterrows():

        row_dict = [f"\"{col}\": {'\"True\"' if row[col] == 1 else "\"False\"}" for col in df.columns]

        json_output.append(row_dict)


    return json_output


def generate_drugresult(n, df):

    sampled_df = df.sample(n, random_state=42)

    col_names = []

    for _, row in sampled_df.iterrows():

        cols = [col for col in df.columns if row[col]==1.0]
        
        if len(cols) == 0:
            cols.append('There is no current medication.')

        col_names.append(cols)


    return col_names


def generate_query(n, query_format):

    sampled_letters = samples.sample(n, random_state=42)['text'].values
    
    a = """
    From the following text: 
    
    """
    
    querys = []
    
    for x in sampled_letters:
        q = a+str(x)+query_format
        querys.append(q)
    
    return querys


def generate_examples(n, df, query_format):
    
    n = n
    query_format = query_format
    df = df
    
    examples = []
    querys = generate_query(n, query_format)
    results = generate_result(n, df)
    for i in range(n):
        temp_dic = {
            "query" : str(querys[i]),
            "answer" : str(results[i]),
        }
        examples.append(temp_dic)
        
    return examples
        
    
    
def generate_drugexamples(n, df, query_format):
    
    n = n
    query_format = query_format
    df = df
    
    examples = []
    querys = generate_query(n, query_format)
    results = generate_drugresult(n, df)
    for i in range(n):
        temp_dic = {
            "query" : str(querys[i]),
            "answer" : str(results[i]),
        }
        examples.append(temp_dic)
        
    return examples


#epi examples
epi_e1 = generate_examples(1, gold_epi, epi_format)
epi_e4 = generate_examples(4, gold_epi, epi_format)
epi_e16 = generate_examples(16, gold_epi, epi_format)
epi_e64 = generate_examples(64, gold_epi, epi_format)

#sei examples
sei_e1 = generate_examples(1, gold_sei, sei_format)
sei_e4 = generate_examples(4, gold_sei, sei_format)
sei_e16 = generate_examples(16, gold_sei, sei_format)
sei_e64 = generate_examples(64, gold_sei, sei_format)

#drug examples
drug_e1 = generate_drugexamples(1, gold_drugs, drug_format)
drug_e4 = generate_drugexamples(4, gold_drugs, drug_format)
drug_e16 = generate_drugexamples(16, gold_drugs, drug_format)
drug_e64 = generate_drugexamples(64, gold_drugs, drug_format)

#st examples
ST_e1 = generate_examples(1, gold_sei_type, ST_format)
ST_e4 = generate_examples(4, gold_sei_type, ST_format)
ST_e16 = generate_examples(16, gold_sei_type, ST_format)
ST_e64 = generate_examples(64, gold_sei_type, ST_format)


#sei examples
AS_e1 = generate_examples(1, gold_as, AS_format)
AS_e4 = generate_examples(4, gold_as, AS_format)
AS_e16 = generate_examples(16, gold_as, AS_format)
AS_e64 = generate_examples(64, gold_as, AS_format)
        

SyntaxError: f-string expression part cannot include a backslash (3735199867.py, line 125)

In [3]:
AS_e64

[{'query': '\n    From the following text: \n    \n    KINGS NEUROSCIENCES CENTRE\n\n\nDate: 24 February 2014\n\nMr RS Bhangoo\n\nConsultant Neurosurgeon\n\nKing\'s College Hospital\n\nDear Mr Bhangoo\n\nAndrea Ortega-Cordova, DOB: 15-07-1978, Hospital No: Z038063 NHS No. \n\n64 Tower Mill Road, LONDON, SE15 6BZ\n\nI would be grateful if this lady could be reviewed in the neuro-oncology MDT meeting.  She has refractory focal epilepsy, congenital left hemiplegia and splenectomy for ITP.  You have seen her previously in relation to her right hemispheric meningoma which was picked up incidentally during a pre-operative workup for vagus nerve stimulation. \n\nFrom an epilepsy point of view her seizures appear to be quite stable.  However she had an MRI brain scan earlier this year which showed essentially stable appearances but mildly progressive distortion of the right cerebral peduncle.  In view of her scan and her history I would be grateful if you would review her in the neuro-oncology