In [None]:
import pandas as pd
import openai

In [None]:
valA = pd.read_csv('TaskA-ValidationSet.csv')

In [None]:
sections = valA['section_header'].unique().tolist()
max_tokens = {}

for s in sections:
  s_df = valA[valA['section_header'] == s]
  avg_chars = s_df['section_text'].str.len().mean()
  max_tokens[s] = int(round(avg_chars/4))
print(max_tokens)

In [None]:
name_to_abb = {
    'GENHX': 'HISTORY of PRESENT ILLNESS',
    'ROS': 'REVIEW OF SYSTEMS',
    'PASTMEDICALHX': 'PAST MEDICAL HISTORY',
    'MEDICATIONS': 'Medications',
    'CC': 'CHIEF COMPLAINT',
    'PASTSURGICAL': 'PAST SURGICAL HISTORY',
    'FAM/SOCHX': 'FAMILY HISTORY/SOCIAL HISTORY',
    'DISPOSITION': 'Disposition',
    'DIAGNOSIS': 'Diagnosis',
    'EDCOURSE': 'EMERGENCY DEPARTMENT COURSE',
    'PLAN': 'Plan',
    'LABS': 'Labs',
    'ASSESSMENT': 'Assessment',
    'ALLERGY': 'Allergy',
    'GYNHX': 'GYNECOLOGIC HISTORY',
    'EXAM': 'Exam',
    'OTHER_HISTORY': 'Other history',
    'PROCEDURES': 'Procedures',
    'IMAGING': 'Imaging',
    'IMMUNIZATIONS': 'Immunizations'
}

In [None]:
openai.api_key = #insert API key here

In [None]:
def get_pred_inst(prompt, model, max_tokens):
  response = openai.Completion.create(
            engine= model,
            prompt= prompt,
            max_tokens=max_tokens,
            n=1,
            stop=None,
            temperature=0.2,
            timeout=45
        )
  return response.choices[0].text.strip()

In [None]:
def get_pred_chat(prompt, model, max_tokens):
  response = openai.ChatCompletion.create(
              model=model,
              messages=[
                  {"role": "system", "content": "You are a smart doctor as well as a professional medical scribe with a lot of experience."},
                  {"role": "user", "content": prompt},
              ],
              temperature=0.2,
              max_tokens=max_tokens,
              timeout=45,
              n=1,
              stop=None
          )

  return response['choices'][0]['message']['content']

In [None]:
def get_prompt(section, dia):
  prompt = """Generate a summary for the section "{section}" in a SOAP\
 note based on the provided doctor-patient dialogue.\n\nDialogue:\n{dia}\n\n{section}\
 Summary:""".format(section=section, dia=dia)
  return prompt

In [None]:
models = ["text-ada-001", "text-babbage-001", "text-curie-001", "text-davinci-003", "gpt-3.5-turbo", "gpt-4"]

In [None]:
new_columns = valA.apply(
    lambda row: pd.Series(
        {
            mod:
            get_pred_chat(get_prompt(name_to_abb[row['section_header']], row["dialogue"]), mod, max_tokens[row["section_header"]]) if mod in ["gpt-4", "gpt-3.5-turbo"] else
            get_pred_inst(get_prompt(name_to_abb[row['section_header']], row["dialogue"]), mod, max_tokens[row["section_header"]])
            for mod in models
        }
    ),
    axis=1
)

# Merge the new columns back into the original DataFrame
valA = pd.concat([valA, new_columns], axis=1)

In [None]:
valA.to_csv("gpt_generic_prompt.csv")