In [379]:
import pandas as pd
import random
import kagglehub
import os
import json
import re

In [380]:
from conv_generation_gpt_api import generate_natural_conversation_step1, generate_natural_conversation_step2, generate_natural_conversation_step3, generate_natural_conversation_step4

## 1. set the elements for conversation template

In [3]:
# 从名字脚本里面随机选择四个名字
# Download latest version
path = kagglehub.dataset_download("ryanburnsworth/popular-names-by-birth-year-1880-2022")
print("Path to dataset files:", path)

# List all files in the directory to find the correct CSV file
# files = os.listdir(path)
# print("Files in directory:", files)

# Load the CSV file into a pandas DataFrame
file_path = os.path.join(path, 'names_by_birth_year.csv')
df = pd.read_csv(file_path)
print(df.head())

df_sorted = df.sort_values(by="Count", ascending=False) # 1. 按 Count 降序排序
top_20_count = int(len(df_sorted) * 0.2)
df_top_20 = df_sorted.iloc[:top_20_count]  

Path to dataset files: /Users/liuyiwei/.cache/kagglehub/datasets/ryanburnsworth/popular-names-by-birth-year-1880-2022/versions/1
   Year       Name Gender  Count
0  1883       Mary      F   8012
1  1883       Anna      F   3306
2  1883       Emma      F   2367
3  1883  Elizabeth      F   2255
4  1883     Minnie      F   2035


In [4]:
# 1. 将所有离开原因写成一个列表
leave_reasons = [
    "bathroom break",
    "coffee break",
    "forgot something important",
    "forgot to print some documents",
    "forgot to recieve a package",
    "forgot to return a package",
    "forgot to run errands",
    "forgot to submit documents",
    "have a meeting starting soon that I need to prepare for",
    "have a previous engagement that I need to attend to quickly",
    "have a work-related emergency that requires my immediate attention",
    "have an unexpected visitor at my door",
    "have errands to run",
    "have to attend to someone who just walked in",
    "have to check on something",
    "have to go to the restroom",
    "have to pick up a prescription",
    "have to pick up dry cleaning",
    "have to print or scan documents",
    "have to receive a delivery",
    "have to recharge laptop",
    "have to return a borrowed item",
    "have to take care of a family matter",
    "have to take care of an unexpected task",
    "have unexpected visitor",
    "his/her pet needs attention",
    "his/her family is calling",
    "incoming delivery",
    "must respond to a phone call",
    "need to check on a friend or family member who needs assistance",
    "need to finish a task that’s time-sensitive",
    "need to get a phone call",
    "need to get some coffee",
    "need to go to the toilet",
    "need to grab a snack or a drink",
    "need to have a quick chat with someone else",
    "need to make a phone call",
    "need to make a quick trip to the drug store",
    "need to make a quick trip to the grocery store",
    "need to pick up a package",
    "need to receive a parcel",
    "need to recharge cellphone",
    "need to register for an event",
    "need to schedule a haircut or salon appointment",
    "need to schedule another appointment",
    # "need to step away for a moment to stretch and clear my mind",
    "need to step out for a moment",
    "need to submit some papers",
    "need to take care of some paperwork or documents",
    "need to take care of some personal matters",
    # "need to take care of something related to my health",
    "need to take care of something urgent",
    "need to troubleshoot something",
    "parking meter expiring",
    "remembered something that needs to be taken care of",
    "remembered to receive a package",
    "remembered to submit some papers",
    "remembered to take care of some paperwork or documents",
    "remembered to take care of some personal matters",
    "remembered to take care of something urgent",
    "want to go grab a drink",
    "want to go grab a coffee",
    "want to go take some fresh air",
    "want to go to the bathroom",
    # 下面是让gpt加的
    "need to move my car",
    "have to take an urgent call from my boss",
    "need to check my emails quickly",
    "have to respond to an important message",
    "need to restart my computer",
    "have to take a quick medication",
    "need to handle a minor household emergency",
    # "have to answer the doorbell",
    "have to refill my water bottle",
    "have to feed my pet",
    "have to water my plants",
    # "have to address a security alert at home",
    "have to take a brief walk to clear my mind",
    "need to step outside briefly to meet someone",
    "have to adjust the thermostat",
    "need to quickly tidy up my workspace",
    "need to quickly verify something important",
    # "have to quickly fix something technical",
    # "need to step away briefly due to loud background noise",
    "have to quickly arrange something for a later meeting",
    "need to briefly step out to confirm travel arrangements",
    "have to take care of an urgent email",
    "have to briefly assist a coworker",
    "need to briefly leave to verify appointment details",
    # "have to check on an appliance or device",
    "have to briefly tend to something outside",
    "have to quickly reschedule an upcoming meeting",
    "need to briefly attend to my child",
    # "have to briefly check the weather due to upcoming plans",
    # "need to quickly confirm my availability",
    "need to grab a quick snack",
    "need to quickly stretch my legs",
    "have to briefly troubleshoot my internet connection",
    "have to briefly step out for privacy reasons",
    "need to quickly tidy the room before another meeting",
    # "have to briefly step away to manage background distractions",
    "have to quickly update someone about my status",
    "need to briefly review notes or materials",
    "have to briefly leave to answer an urgent text",
    "have to briefly assist someone else in the household",
    # "need to briefly rest my eyes due to strain",
    "have to quickly plug in my device to charge",
    "need to briefly leave to retrieve an important item",
    "have to step away briefly to close a window or door",
    "need to quickly ensure I turned off an appliance",
    "have to briefly step away for a personal reason",
    "need to briefly step away due to allergy or health symptoms"
]

# # 3. 测试输出
# print("leave_reason_B:", leave_reason_B)
# print("leave_reason_D:", leave_reason_D)

In [5]:
# Function to populate template with variables
def populate_template(template, scenario, relationship, situation, lie_objective, real_reason, the_lie, the_fact, the_emotion_to_avoid, A_name, B_name, C_name, D_name):
    return template.replace("{{Topic for the scenario}}", scenario)\
                   .replace("{{relationship descriptor}}", relationship)\
                   .replace("{{A: the liar name}}", A_name)\
                   .replace("{{B: the target name}}", B_name)\
                   .replace("{{C: the accomplice name}}", C_name)\
                   .replace("{{D: the observer name}}", D_name)\
                   .replace("{{leave reason B}}", leave_reason_B)\
                   .replace("{{leave reason D_1}}", leave_reason_D_1)\
                   .replace("{{leave reason D_2}}", leave_reason_D_2)\
                   .replace("{{real_reason_c}}", real_reason_c)\
                   .replace("{{truth_c}}", truth_c if truth_c is not None else "")\
                   .replace("{{the lie}}", lie_c)\
                   .replace("{{situation_topic}}", situation_topic)\
                   .replace("{{situation}}", situation)\
                   .replace("{{lie_objective}}", lie_objective)

# def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str ) -> str:
#     """
#     将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
#     同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}"。
#     """
#     result = text.replace("A", f"{A_name}")
#     result = result.replace("B", f"{B_name}")
#     result = result.replace("C", f"{C_name}")
#     result = result.replace("D", f"{D_name}")
#     return result

In [6]:
def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str) -> str:
    """
    将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
    同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}" 等。
    采用一次性替换，避免替换后的内容被再次修改。
    """
    # 定义替换映射
    mapping = {
        "A": A_name,
        "B": B_name,
        "C": C_name,
        "D": D_name
    }
    # 构建正则表达式，先匹配形如 A's 的情况，再匹配单个字母
    pattern = re.compile(r"(A's|B's|C's|D's|A|B|C|D)")
    
    def replacer(match):
        token = match.group(0)
        if token.endswith("'s"):
            # 处理 A's、B's、C's、D's 的情况
            letter = token[0]
            return mapping[letter] + "'s"
        else:
            return mapping[token]
    
    return pattern.sub(replacer, text)

In [7]:
# list for situation
# concert
# shopping
# painting
# ice skating

## 2. template

### 2.0 import elements from the conv_element.json

In [1432]:
import json
with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_0.json", "r", encoding="utf-8") as f:
    data_list = json.load(f)

In [1433]:
data = data_list[5]
data

{'set_id': '0-2-0-2',
 'lie_id': '0-2',
 'conv_id': 2,
 'truth_id': 1,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': 'music taste',
  'situation_topic': 'the plan for today after work',
  'situation': 'Berthal is inviting Angelik to attend a classic rock live show tonight',
  'lie_objective': 'decline the invitation',
  'leave_reason_B': 'have to go to the restroom',
  'leave_reason_D_1': 'need to make a quick trip to the grocery store',
  'leave_reason_D_2': 'want to go grab a drink'},
 'relationship': 'coworkers',
 'multiple_liar': False,
 'real_reason_type': True,
 'characters': {'liar': 'Angelik',
  'target': 'Berthal',
  'accomplice': 'Gryphon',
  'observer': 'Courtney'},
 'lie': {'real_reason_c': 'Angelik takes great pride in their music taste and considers it superior to others, and dislike Berthal’s preferred style.',
  'lie_c': 'Angelik already has some plans.',
  'truth_c': 'Angelik will go home after work.',
  'falsification': False},
 'full

In [1437]:
# 最外层直接取值
set_id = data["set_id"]
lie_id = data["lie_id"]
conv_id = data["conv_id"]
truth_id = data["truth_id"]
lie_type = data["lie_type"]
emotion = data["emotion"]

# 取 topic 下面的嵌套字段
scenario = data["topic"]["scenario"]
situation_topic = data["topic"]["situation_topic"]
situation = data["topic"]["situation"]
lie_objective = data["topic"]["lie_objective"]
leave_reason_B = data["topic"]["leave_reason_B"]
leave_reason_D_1 = data["topic"]["leave_reason_D_1"]
leave_reason_D_2 = data["topic"]["leave_reason_D_2"]

relationship = data["relationship"]

# 取 characters 下面的嵌套字段
A_name = data["characters"]["liar"]
B_name = data["characters"]["target"]
C_name = data["characters"]["accomplice"]
D_name = data["characters"]["observer"]

# 取 lie 下面的嵌套字段
real_reason_c = data["lie"]["real_reason_c"]
lie_c = data["lie"]["lie_c"]
truth_c = data["lie"]["truth_c"]
truth_c = "A hasn't decided what to do yet"
falsification = data["lie"]["falsification"]

# 最外层的上下文
# part_1 = data["part_1"]
# part_2 = data["part_2"]
# part_3 = data["part_3"]
# part_4 = data["part_4"]
# full_context = data["full_context"]
# short_context = data["short_context"]

In [1203]:
# Print basic information
print("Basic Information:")
print(f"set_id: {set_id}")
print(f"lie_id: {lie_id}")
print(f"conv_id: {conv_id}")
print(f"truth_id: {truth_id}")
print(f"lie_type: {lie_type}")
print(f"emotion: {emotion}")
print("\n")

# Print topic information
print("Topic Information:")
print(f"scenario: {scenario}")
print(f"situation_topic: {situation_topic}")
print(f"situation: {situation}")
print(f"lie_objective: {lie_objective}")
print(f"leave_reason_B: {leave_reason_B}")
print(f"leave_reason_D_1: {leave_reason_D_1}")
print(f"leave_reason_D_2: {leave_reason_D_2}")
print("\n")

# Print relationship
print("Relationship:")
print(f"relationship: {relationship}")
print("\n")

# Print character names
print("Character Names:")
print(f"A_name (liar): {A_name}")
print(f"B_name (target): {B_name}")
print(f"C_name (accomplice): {C_name}")
print(f"D_name (observer): {D_name}")
print("\n")

# Print lie information
print("Lie Information:")
print(f"real_reason_c: {real_reason_c}")
print(f"lie_c: {lie_c}")
print(f"truth_c: {truth_c}")
print(f"falsification: {falsification}")

Basic Information:
set_id: 2-10-2-1
lie_id: 0-1
conv_id: 10
truth_id: 1
lie_type: altruistic_white_lies
emotion: sad


Topic Information:
scenario: the family's pet has gone missing yesterday, and talk about how to find it
situation_topic: talking about what happened to the missing pet
situation: Catherine is very upset that the cat is lost
lie_objective: comfort Catherine's feeling
leave_reason_B: use the bathroom
leave_reason_D_1: have a urgent call
leave_reason_D_2: use the bathroom


Relationship:
relationship: families (parents, one kid, aunt/uncle)


Character Names:
A_name (liar): Jupiter
B_name (target): Catherine
C_name (accomplice): Jackie
D_name (observer): Petra


Lie Information:
real_reason_c: the child will be heartbroken if hearing that the pet has passed away or is lost forever
lie_c: parent Jupiter and Jackie telling the young kid Catherine that the pet has gone to live on a big, happy farm in the countryside
truth_c: Petra just got a call that the missing pet is foun

### 2.1 set the elements for prompt

In [1145]:
leave_reason_B, leave_reason_D_1, leave_reason_D_2 = random.sample(leave_reasons, 3)
print(leave_reason_B)
print(leave_reason_D_1)
print(leave_reason_D_2)

have to briefly leave to answer an urgent text
forgot to run errands
have to water my plants


In [1204]:
leave_reason_B = 'use the bathroom'
leave_reason_D_1 = 'have a urgent call'
leave_reason_D_2 = 'feed the dog'
# # leave_reason_B = 'feed the dog'
# # leave_reason_D_1 = 'need to quickly stretch my legs'
# # leave_reason_D_2 = 'have to briefly assist someone else in the household'

In [1396]:
random_names = df_top_20['Name'].sample(n=4).to_list()
A_name, B_name, C_name, D_name = random_names
print(f"A_name = {A_name}")
print(f"B_name = {B_name}")
print(f"C_name = {C_name}")
print(f"D_name = {D_name}")

A_name = Kalyn
B_name = Jim
C_name = Leroy
D_name = Mitzi


In [1418]:
lie_type = "altruistic_white_lies"
emotion = "sad"

# Set your variables
set_id = "2-10-5-2"
lie_id = "5-2"
conv_id = 10
########
# truth_id = 0
# falsification = None  # 赋值为空值（null）
########
truth_id = 1
# falsification = True
falsification = False
########

muiltiple_liar = True
# muiltiple_liar = False

# step 1
scenario = "the family's pet rabbit has gone missing yesterday, and talk about how to find it"
relationship = "families (parents, one kid, aunt/uncle)"

# step 2
# real_reason是否是针对target的
# true是针对target的 && false不是针对target的
real_reason_type = True # 这个是只对B好
# real_reason_type = False # 这个是对A和B都好1

# lie的三元组理论上不能有重叠比如realson不要出现truth
real_reason_c = "the child A will be heartbroken if hearing that the pet has passed away or is lost forever"
real_reason_q = "the child A will be heartbroken if hearing that the pet has passed away or is lost forever"

# step 3
# situation_topic是在step3里面用来自然引出truth_c的
situation_topic = "talking about what happened to the missing pet"
# # # truth有时候是关于AC对于B的
# falsified = true
# 因为去掉了avoid repeating exactly the same word这里的话c要比q长一些
# truth_c = "D just got a call that the missing pet is found already dead due to a car crush"
# truth_q = "the missing pet is found already dead due to a car crush"

# # falsified = false
truth_c = "the pet has likely gone missing for good, and there is little chance it will be found or return home"
truth_q = "the pet probably won’t be found"


# situation是在step4里面用来引出lie的
lie_objective = "comfort B's feeling"
situation = "the kid B is devastated about the lost cat, crying nonstop and saying the pet must be starving or getting bullied outside, which makes the parents decide to lie"
lie_c = "Parent A and C told the child B that they just got the message the pet was chosen by the Starlight Fairy to be her special companion in the enchanted forest, where all animals live forever young"
lie_q = "the pet became the Starlight Fairy's companion"


#下面这几句话不能有大写的ABCD
real_reason_c = replace_ABCD_with_name(real_reason_c, A_name, B_name, C_name, D_name)
lie_c = replace_ABCD_with_name(lie_c, A_name, B_name, C_name, D_name)
truth_c = replace_ABCD_with_name(truth_c, A_name, B_name, C_name, D_name)
# 简化lie的三元组可以直接用于question generation
real_reason_q = replace_ABCD_with_name(real_reason_q, A_name, B_name, C_name, D_name)
lie_q = replace_ABCD_with_name(lie_q, A_name, B_name, C_name, D_name)
truth_q = replace_ABCD_with_name(truth_q, A_name, B_name, C_name, D_name)
situation = replace_ABCD_with_name(situation, A_name, B_name, C_name, D_name)
lie_objective = replace_ABCD_with_name(lie_objective, A_name, B_name, C_name, D_name)
situation_topic = replace_ABCD_with_name(situation_topic, A_name, B_name, C_name, D_name)
scenario = replace_ABCD_with_name(scenario, A_name, B_name, C_name, D_name)

In [1419]:
real_reason_c

'the child Kalyn will be heartbroken if hearing that the pet has passed away or is lost forever'

### 2.2 put elements into generation prompt

#### 2.2.1 Single liar

##### 2.2.1.1 provide truth

In [1438]:
########## 提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion要删掉
step_1 = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2 = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# but do not propose any plans with others in the conversation之后不是invitation了要删掉
#  without directly repeating these exact words
# 提供truth
step_3 = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
The conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation. 
And then '{{truth_c}}' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# In response, {{A: the liar name}} tells a white lie that '{{the lie}}', in order to '{{lie_objective}}'.
step_4 = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally {{situation}}. In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}', in order to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When {{A: the liar name}} decline invitation, should simply decline without suggesting alternative plans or future possibilities.”
"""

# Populate template
filled_step_1 = populate_template(step_1, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing music taste while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Angelik, Berthal, Gryphon and Courtney.
They are coworkers.

Now, Berthal leaves the conversation because of the reason: have to go to the restroom, while Courtney leaves the conversation because of the reason: need to make a quick trip to the grocery store.
With Berthal and Courtney absent, Angelik and Gryphon continue their conversation, shifting the topic to 'Angelik takes great pride in their music taste and considers it superior to others, and dislike Berthal’s preferred style.'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Berthal and Courtney are leaving the conversation.

Now, only Courtney returns and rejoins the conversation after after previously leaving due to: need to make a quick trip to the grocer

##### 2.2.1.2 Do not provide truth

In [1176]:
########## 不提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion
step_1_notruth = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
# but do not propose any plans with others in the conversation
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# with {{situation_topic}}. Make sure the dialogue flows naturally without directly repeating these exact words.
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues, and then {{situation}} naturally unfolds. In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}' to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""
# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing casual chatting about their recent cooking and baking experiences while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Francis, Pearl, Ashari and Miriam.
They are colleagues.

Now, Pearl leaves the conversation because of the reason: have to briefly leave to answer an urgent text, while Miriam leaves the conversation because of the reason: forgot to run errands.
With Pearl and Miriam absent, Francis and Ashari continue their conversation, shifting the topic to 'Francis is extremely pikcy about their belongings, making it hard for others to choose gifts that align with their exacting standards'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Pearl and Miriam are leaving the conversation.

Now, only Miriam returns and rejoins the conversation after after previously leaving due to

#### 2.2.2 Mutiple liars

##### 2.2.2.1 provide truth

In [1420]:
########## step3是situation_topic,step4是situation ################
########## 提供truth!! ################
step_1_truth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}, {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
"""


step_2_truth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 提供truth:
#  due to: {{leave reason D_1}}
# !!!!!!!!!
# Ensure the dialogue flows naturally without directly repeating these exact words.
step_3_truth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
And then '{{truth_c}}' is natrually revealed in the conversation. 
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# because of the reason{{leave reason B}}
step_4_truth = """Now {{B: the target name}} returns to the conversation after leaving the conversation. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Kalyn, Jim, Leroy and Mitzi.
Start the conversation with all participants discussing the family's pet rabbit has gone missing yesterday, and talk about how to find it.
They are families (parents, one kid, aunt/uncle), Kalyn and Leroy are parents, Jim is kid, and Mitzi is aunt/uncle.

Now, Jim leaves the conversation because of the reason: use the bathroom, while Mitzi leaves the conversation because of the reason: have a urgent call.
With Jim and Mitzi absent, Kalyn and Leroy continue their conversation, shifting the topic to 'the child Kalyn will be heartbroken if hearing that the pet has passed away or is lost forever'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Jim and Mitzi are leaving the conversation.

Now, only Mitzi returns and rejoins the conversation after after previously leaving. Note that Jim has not returned to the conversation yet.
Then the conversat

##### 2.2.2.2 Do not provide truth

In [1377]:
########## 不提供truth!! ################
########## step3是situation_topic,step4是situation_topic + situation ################
step_1_notruth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}, {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
# due to: {{leave reason D_1}}
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# {{leave reason B}}
# because of the reason: have to go to the restroom
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues with {{situation_topic}}. And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Homer, Norene, Danielle and Lina.
Start the conversation with all participants discussing the family's pet rabbit has gone missing yesterday, and talk about how to find it.
They are families (parents, one kid, aunt/uncle), Homer and Danielle are parents, Norene is kid, and Lina is aunt/uncle.

Now, Norene leaves the conversation because of the reason: use the bathroom, while Lina leaves the conversation because of the reason: have a urgent call.
With Norene and Lina absent, Homer and Danielle continue their conversation, shifting the topic to 'the child Homer will be heartbroken if hearing that the pet has passed away or is lost forever'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Norene and Lina are leaving the conversation.

Now, only Lina returns and rejoins the conversation after after previously leaving. Note that Norene has not returned to the conversation ye

### 2.3 generation

#### step 1

In [1440]:
result_1 = generate_natural_conversation_step1(filled_step_1)
part_1 = result_1.choices[0].message['content']
print(part_1)

Angelik: So, what type of music do you all listen to while working? Personally, I find classical music helps me focus.

Berthal: Classical, huh? That’s interesting, Angelik. I usually go for some good old rock and roll. It keeps me energized, especially when I’m handling tedious tasks.

Gryphon: Rock is great, Berthal! I lean more towards electronic music, though. The beats help me stay concentrated for longer periods.

Courtney: I love electronic too, Gryphon! But for me, it's more of a mix. On some days, it's jazz, especially when I’m in a creative rut. It’s like it unblocks my thoughts.

Angelik: Jazz is nice! Do you guys have any favorite artists? I’ve been into a lot of Tchaikovsky lately—it’s so calming.

Berthal: Favorite artist? Hmm, lately I can't stop listening to Led Zeppelin. Their music never gets old for me.

Gryphon: Oh, Led Zeppelin! Good choice. I’m really into Daft Punk right now. Their sound is unique and invigorating.

Courtney: Daft Punk is legendary! As for jazz, 

In [None]:
# situation = "Berthal is inviting Angelik to attend a classic rock live show tonight",

#### step 2

In [1441]:
input_2 = filled_step_2 + "\n"+ part_1
print(input_2)

Now, Berthal leaves the conversation because of the reason: have to go to the restroom, while Courtney leaves the conversation because of the reason: need to make a quick trip to the grocery store.
With Berthal and Courtney absent, Angelik and Gryphon continue their conversation, shifting the topic to 'Angelik takes great pride in their music taste and considers it superior to others, and dislike Berthal’s preferred style.'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Berthal and Courtney are leaving the conversation.

Angelik: So, what type of music do you all listen to while working? Personally, I find classical music helps me focus.

Berthal: Classical, huh? That’s interesting, Angelik. I usually go for some good old rock and roll. It keeps me energized, especially when I’m handling tedious tasks.

Gryphon: Rock is great, Berthal! I lean more towards electronic music, though. The beats help me stay concentrated for long

In [1449]:
result_2 = generate_natural_conversation_step2(input_2)
part_2 = result_2.choices[0].message['content']
print(part_2)

Courtney: Well, speaking of daily grind, I've got to make a quick trip to the grocery store. Catch you all later!

Berthal: Looks like we're all stepping out for a bit. I have to run to the restroom, but I'll be back soon. Don’t have too much fun without us!

Angelik: We’ll try to save the best part of the music chat for you both. See you!

Gryphon: Take care, Courtney and Berthal! Now, Angelik, you mentioned Tchaikovsky earlier—quite the sophisticated choice.

Angelik: I do think my taste leans towards the refined. I appreciate the complexity and emotion that comes with classical compositions. It’s just a notch above other styles, in my opinion.

Gryphon: There's definitely a depth to classical music that other genres might not hit. Personally, I prefer electronic because of its modern twist, but I can see why you hold classical in such high esteem. How do you feel about rock, though?

Angelik: Rock is, well, not really my cup of tea. I respect others' preferences, but I find it a bit

#### step 3

In [1443]:
input_3 = filled_step_3 + "\n" + part_1 + "\n\n" + part_2
print(input_3)

Now, only Courtney returns and rejoins the conversation after after previously leaving due to: need to make a quick trip to the grocery store. Note that Berthal has not returned to the conversation yet.
The conversation continues and naturally shifts to the plan for today after work. Do NOT catch up or recap details from the previous conversation. 
And then 'A hasn't decided what to do yet' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally.
After engaging briefly , Courtney leaves the conversation again because of the reason: want to go grab a drink.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that Courtney first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.

Angelik: So, what type of music do you all listen to while working? Personally, I

In [1444]:
# 检查step3生成的第一句！！！不能重复step2里面realreason的部分（因为不能让observer知道这部分）！！！！
# 检查没有target的名字
result_3 = generate_natural_conversation_step3(input_3)
part_3 = result_3.choices[0].message['content']
print(part_3)

Courtney: Hey, I'm back! Just in time to catch the end of this interesting music chat. 

Gryphon: Welcome back, Courtney! We're just diving into how music influences our work routines. Got anything else on your mind?

Courtney: Well, after work, I'm thinking about unwinding a bit. The day’s been hectic, and there's nothing like relaxing to some tunes.

Angelik: That sounds great, Courtney! Have any specific plans in mind for after work, or just going with the flow?

Courtney: Honestly, I haven’t decided what to do yet. Might just play it by ear and see where the evening takes me.

Gryphon: That can be refreshing. Sometimes the best moments are the unplanned ones.

Courtney: True! Anyway, I need to leave again. I’m going to grab a drink. Catch you all later!


#### step 4

In [1445]:
input_4 = filled_step_4+ "\n" + part_1 + "\n\n" + part_2 + "\n\n" +part_3
print(input_4)

Now Berthal returns to the conversation after leaving the conversation because of the reason: have to go to the restroom. 
First have Berthal explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally Berthal is inviting Angelik to attend a classic rock live show tonight. In response, Angelik tells Berthal that 'Angelik already has some plans.', in order to 'decline the invitation'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When Angelik decline invitation, should simply decline without suggesting alternative plans or future possibilities.”

Angelik: So, what type of music do you all listen to while working? Personally, I find classical music helps me focus.

Berthal: Classical, huh? That’s interesting, Angelik. I usually go for some good old rock and roll. It keeps me energized, especially when I’m

In [1446]:
result_4 = generate_natural_conversation_step4(input_4)
part_4 = result_4.choices[0].message['content']
print(part_4)

Berthal: Hey, I'm back! Hope I didn't miss too much. You know, speaking of music, there's a classic rock live show tonight. Angelik, would you like to join me?

Angelik: Thanks for the invite, Berthal, but I already have some plans for tonight.

Berthal: No worries! Maybe some other time then. Hope you enjoy whatever you're up to.

Gryphon: A classic rock show sounds awesome, though. Who's playing?

Berthal: It's a local band but they've got that authentic old-school vibe. I think they usually play covers of the big names like Deep Purple and Pink Floyd.

Gryphon: Sounds like a blast! I might check them out in the future. Enjoy the show, Berthal!


#### combine the conversation

In [1407]:
# [Crawford and Adrian leave the conversation.]
# 需要去掉这样的东西

In [1447]:
# 10
# 6
# 6
# 6
full_context = part_1 + part_2 + part_3 + part_4
short_context = part_2 + part_3 + part_4
# 最外层的上下文
full_context = "\n\n".join([part_1, part_2, part_3, part_4])
short_context = "\n\n".join([part_2, part_3, part_4])

import tiktoken
# 以 GPT-4 模型为例，选择对应的模型编码器
encoding = tiktoken.encoding_for_model("gpt-4o")
full_context_tokens = encoding.encode(full_context)
short_context_tokens = encoding.encode(short_context)

# 分别打印长度（字符数）和token数
print(f"Full context length (tokens): {len(full_context_tokens)}")
print(f"Short context length (tokens): {len(short_context_tokens)}")

Full context length (tokens): 892
Short context length (tokens): 581


In [1448]:
# # Print the full conversation context

# # Read the JSON file
# with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json", "r", encoding="utf-8") as f:
#     data_list = json.load(f)

# # Find and update the entry with set_id "4-13-0-0"
# for entry in data_list:
#     if entry["set_id"] == "4-13-0-0":
#         entry["short_context"] = short_context
#         break

# # Write the updated data back to the file
# with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json", "w", encoding="utf-8") as f:
#     json.dump(data_list, f, ensure_ascii=False, indent=4)

## 3. json

In [1451]:
# 将所有字段分组放进一个字典
data_dict = {
    "set_id": set_id,
    "lie_id": lie_id,
    "conv_id": conv_id,
    "truth_id": truth_id,
    "lie_type": lie_type,
    "emotion": emotion,
    "topic": {
        "scenario": scenario,
        "situation_topic": situation_topic,
        "situation": situation,
        "lie_objective": lie_objective,
        "leave_reason_B": leave_reason_B,
        "leave_reason_D_1": leave_reason_D_1,
        "leave_reason_D_2": leave_reason_D_2,
    },
    "relationship": relationship,
    "muiltiple_liar": muiltiple_liar,
    "real_reason_type": real_reason_type,
    "characters": {
        "liar": A_name,
        "target": B_name,
        "accomplice": C_name,
        "observer": D_name
    },
    "lie": {
        "real_reason_c": real_reason_c,
        "lie_c": lie_c,
        "truth_c": truth_c,
        "real_reason_q": real_reason_q,
        "lie_q": lie_q,
        "truth_q": truth_q,
        "falsification": falsification
    },
    "part_1": part_1,
    "part_2": part_2,
    "part_3": part_3,
    "part_4": part_4,
    "full_context": full_context,
    "short_context": short_context,
    "full_context_tokens": len(full_context_tokens),
    "Short_context_tokens": len(short_context_tokens)
}

In [1452]:
# 写一个判断如果 'falsification' = null不写入 'truth_c'
if data_dict.get('truth_id') == 0:
    data_dict['lie']['truth_c'] = ''
# 记得写入json
append_data_to_json(data_dict, "/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_0.json")
data_dict

{'set_id': '0-2-0-2',
 'lie_id': '0-2',
 'conv_id': 2,
 'truth_id': 1,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': 'music taste',
  'situation_topic': 'the plan for today after work',
  'situation': 'Berthal is inviting Angelik to attend a classic rock live show tonight',
  'lie_objective': 'decline the invitation',
  'leave_reason_B': 'have to go to the restroom',
  'leave_reason_D_1': 'need to make a quick trip to the grocery store',
  'leave_reason_D_2': 'want to go grab a drink'},
 'relationship': 'coworkers',
 'muiltiple_liar': True,
 'real_reason_type': True,
 'characters': {'liar': 'Angelik',
  'target': 'Berthal',
  'accomplice': 'Gryphon',
  'observer': 'Courtney'},
 'lie': {'real_reason_c': 'Angelik takes great pride in their music taste and considers it superior to others, and dislike Berthal’s preferred style.',
  'lie_c': 'Angelik already has some plans.',
  'truth_c': "A hasn't decided what to do yet",
  'real_reason_q': 'the child Kaly

In [1289]:
# # 记得写入json
# append_data_to_json(data_dict, "/Users/liuyiwei/Desktop/TactfulToM/dataset/Tactful_conv_element.json")

In [39]:
def append_data_to_json(data_dict, filename="/Users/liuyiwei/Desktop/whiteToM/dataset/Tactful_conv_element.json"):
    # 1. 先尝试读取已有的 JSON 文件，如果不存在则创建一个空列表
    if os.path.exists(filename):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data_list = json.load(f)
                # 如果文件不是一个列表，可能需要根据实际情况做容错处理
                if not isinstance(data_list, list):
                    data_list = []
            except json.JSONDecodeError:
                # 如果文件不是合法的 JSON 格式，就初始化为空列表
                data_list = []
    else:
        data_list = []
    
    # 2. 向列表中追加新的字典
    data_list.append(data_dict)
    
    # 3. 将更新后的列表重新写回 JSON 文件
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data_list, f, ensure_ascii=False, indent=4)

In [55]:
# # gpt rewrite 一下real_reason_c
# real_reason_q = "Alice’s social battery is really running dry these days, and she thinks she needs more time to be alone."
# lie_q = "Alice has a meeting in the afternoon, so she won’t be able to join for studying."
# truth_q = "Alice is actually free for the whole afternoon."