### Part 3 -> Multiturn Generation

In [1]:
import json
from tqdm import tqdm
from multiturn_modul import MultiturnStyle, get_multiturn_style
import pandas as pd
import random

In [4]:
# Create situations

# Path for input data and to store prompts
initial_prompts_name = "specialty_x_domain"
initial_prompts_path = "../results/parsed_prompts_" + initial_prompts_name + ".json"
output_path = "../results/situations_" + initial_prompts_name + ".jsonl"


# store prompts in a list
with open(initial_prompts_path, 'r') as file:
    initial_prompts_list = json.load(file)  # Load JSON data from file

# Load countries necessary to give the medical setting
countries = pd.read_csv("../resources/countries_by_income_category.csv")


print("Creating list of situations and initial prompts")

with open(output_path, 'w') as file:
    for initial_prompt in tqdm(initial_prompts_list):
        sampled_country = random.choice(countries.iloc[:, 0].tolist())
        id = initial_prompt["id"]
        if initial_prompts_name == "specialty_x_domain":
            profession, specialty, domain = initial_prompt["context"].lower().split(",")
            multiturn_style = get_multiturn_style(id=id, sampled_country=sampled_country, profession=profession, specialty=specialty, domain=domain)
        else:
            multiturn_style = get_multiturn_style(id=id, sampled_country=sampled_country)
        system_prompt_chatbot = multiturn_style.system_prompt_chatbot()
        system_prompt_user = multiturn_style.system_prompt_user()
        nbr_of_turns = multiturn_style.number_of_turns
        line = {
            "id": id,
            "nbr_of_turns": nbr_of_turns,
            "initial_prompt": initial_prompt["prompt"],
            "system_prompt_chatbot": system_prompt_chatbot,
            "system_prompt_user": system_prompt_user,
            "multiturn_style_parameters": multiturn_style.get_all_system_prompts_atributes()
        }
        file.write(json.dumps(line) + '\n')

print(f"Up to {id} prompts with situations saved to {output_path}")

Creating list of situations and initial prompts


100%|██████████| 6321/6321 [00:01<00:00, 4430.15it/s]

Up to 20-6320 prompts with situations saved to ../results/situations_specialty_x_domain.jsonl





In [None]:
# Create batch of prompts

# Specify which model should be used to answer prompts
gpt_model = "gpt-4o"

# Path for input data and to store prompts
situations_name = "specialty_x_domain"
situations_path = "../results/situations_" + situations_name + ".jsonl"
output_path = "../results/batched_prompts_" + situations_name + ".jsonl"


# store prompts in a list
situations = []
with open(situations_path, 'r') as file:
    for i, line in enumerate(file):
        if i >= 100:
            break
        situations.append(json.loads(line))

print("Creating a batch of prompts")
print(f"that can be processed by {gpt_model} in batch mode")

with open(output_path, 'w') as file:
    nbr_of_prompts = 0
    for situation in tqdm(situations):
        line = {
            "custom_id": situation["id"],
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": gpt_model,
                "messages": [
                    {"role": "system", "content": situation["system_prompt_chatbot"]},
                    {"role": "user", "content": situation["initial_prompt"]}
                ],
                "max_tokens": 1000
            }
        }
        file.write(json.dumps(line) + '\n')
        nbr_of_prompts += 1

print(f"batch of {nbr_of_prompts} prompts saved to {output_path}")

Creating a batch of prompts
that can be processed by gpt-4o in batch mode


100%|██████████| 100/100 [00:00<00:00, 49461.13it/s]

batch of 100 prompts saved to ../results/batched_prompts_specialty_x_domain.jsonl



