In [265]:
import pandas as pd
import random
import kagglehub
import os
import json

In [269]:
from conv_generation_gpt_api import generate_natural_conversation_step1, generate_natural_conversation_step2, generate_natural_conversation_step3, generate_natural_conversation_step4

## 1. set the elements for conversation template

In [1434]:
# 从名字脚本里面随机选择四个名字
# Download latest version
path = kagglehub.dataset_download("ryanburnsworth/popular-names-by-birth-year-1880-2022")
print("Path to dataset files:", path)

# List all files in the directory to find the correct CSV file
# files = os.listdir(path)
# print("Files in directory:", files)

# Load the CSV file into a pandas DataFrame
file_path = os.path.join(path, 'names_by_birth_year.csv')
df = pd.read_csv(file_path)
print(df.head())

df_sorted = df.sort_values(by="Count", ascending=False) # 1. 按 Count 降序排序
top_30_count = int(len(df_sorted) * 0.3)
df_top_30 = df_sorted.iloc[:top_30_count]  

Path to dataset files: /Users/liuyiwei/.cache/kagglehub/datasets/ryanburnsworth/popular-names-by-birth-year-1880-2022/versions/1
   Year       Name Gender  Count
0  1883       Mary      F   8012
1  1883       Anna      F   3306
2  1883       Emma      F   2367
3  1883  Elizabeth      F   2255
4  1883     Minnie      F   2035


In [1021]:
# 1. 将所有离开原因写成一个列表
leave_reasons = [
    "bathroom break",
    "coffee break",
    "forgot something important",
    "forgot to print some documents",
    "forgot to recieve a package",
    "forgot to return a package",
    "forgot to run errands",
    "forgot to submit documents",
    "have a meeting starting soon that I need to prepare for",
    "have a previous engagement that I need to attend to quickly",
    "have a work-related emergency that requires my immediate attention",
    "have an unexpected visitor at my door",
    "have errands to run",
    "have to attend to someone who just walked in",
    "have to check on something",
    "have to go to the restroom",
    "have to pick up a prescription",
    "have to pick up dry cleaning",
    "have to print or scan documents",
    "have to receive a delivery",
    "have to recharge laptop",
    "have to return a borrowed item",
    "have to take care of a family matter",
    "have to take care of an unexpected task",
    "have unexpected visitor",
    "his/her pet needs attention",
    "his/her family is calling",
    "incoming delivery",
    "must respond to a phone call",
    "need to check on a friend or family member who needs assistance",
    "need to finish a task that’s time-sensitive",
    "need to get a phone call",
    "need to get some coffee",
    "need to go to the toilet",
    "need to grab a snack or a drink",
    "need to have a quick chat with someone else",
    "need to make a phone call",
    "need to make a quick trip to the drug store",
    "need to make a quick trip to the grocery store",
    "need to pick up a package",
    "need to receive a parcel",
    "need to recharge cellphone",
    "need to register for an event",
    "need to schedule a haircut or salon appointment",
    "need to schedule another appointment",
    # "need to step away for a moment to stretch and clear my mind",
    "need to step out for a moment",
    "need to submit some papers",
    "need to take care of some paperwork or documents",
    "need to take care of some personal matters",
    # "need to take care of something related to my health",
    "need to take care of something urgent",
    "need to troubleshoot something",
    "parking meter expiring",
    "remembered something that needs to be taken care of",
    "remembered to receive a package",
    "remembered to submit some papers",
    "remembered to take care of some paperwork or documents",
    "remembered to take care of some personal matters",
    "remembered to take care of something urgent",
    "want to go grab a drink",
    "want to go grab a coffee",
    "want to go take some fresh air",
    "want to go to the bathroom",
    # 下面是让gpt加的
    "need to move my car",
    "have to take an urgent call from my boss",
    "need to check my emails quickly",
    "have to respond to an important message",
    "need to restart my computer",
    "have to take a quick medication",
    "need to handle a minor household emergency",
    # "have to answer the doorbell",
    "have to refill my water bottle",
    "have to feed my pet",
    "have to water my plants",
    # "have to address a security alert at home",
    "have to take a brief walk to clear my mind",
    "need to step outside briefly to meet someone",
    "have to adjust the thermostat",
    "need to quickly tidy up my workspace",
    "need to quickly verify something important",
    # "have to quickly fix something technical",
    # "need to step away briefly due to loud background noise",
    "have to quickly arrange something for a later meeting",
    "need to briefly step out to confirm travel arrangements",
    "have to take care of an urgent email",
    "have to briefly assist a coworker",
    "need to briefly leave to verify appointment details",
    # "have to check on an appliance or device",
    "have to briefly tend to something outside",
    "have to quickly reschedule an upcoming meeting",
    "need to briefly attend to my child",
    # "have to briefly check the weather due to upcoming plans",
    # "need to quickly confirm my availability",
    "need to grab a quick snack",
    "need to quickly stretch my legs",
    "have to briefly troubleshoot my internet connection",
    "have to briefly step out for privacy reasons",
    "need to quickly tidy the room before another meeting",
    # "have to briefly step away to manage background distractions",
    "have to quickly update someone about my status",
    "need to briefly review notes or materials",
    "have to briefly leave to answer an urgent text",
    "have to briefly assist someone else in the household",
    # "need to briefly rest my eyes due to strain",
    "have to quickly plug in my device to charge",
    "need to briefly leave to retrieve an important item",
    "have to step away briefly to close a window or door",
    "need to quickly ensure I turned off an appliance",
    "have to briefly step away for a personal reason",
    "need to briefly step away due to allergy or health symptoms"
]

# # 3. 测试输出
# print("leave_reason_B:", leave_reason_B)
# print("leave_reason_D:", leave_reason_D)

In [388]:
# Function to populate template with variables
def populate_template(template, scenario, relationship, situation, lie_objective, real_reason, the_lie, the_fact, the_emotion_to_avoid, A_name, B_name, C_name, D_name):
    return template.replace("{{Topic for the scenario}}", scenario)\
                   .replace("{{relationship descriptor}}", relationship)\
                   .replace("{{A: the liar name}}", A_name)\
                   .replace("{{B: the target name}}", B_name)\
                   .replace("{{C: the accomplice name}}", C_name)\
                   .replace("{{D: the observer name}}", D_name)\
                   .replace("{{leave reason B}}", leave_reason_B)\
                   .replace("{{leave reason D_1}}", leave_reason_D_1)\
                   .replace("{{leave reason D_2}}", leave_reason_D_2)\
                   .replace("{{real_reason_c}}", real_reason_c)\
                   .replace("{{truth_c}}", truth_c if truth_c is not None else "")\
                   .replace("{{the lie}}", lie_c)\
                   .replace("{{situation_topic}}", situation_topic)\
                   .replace("{{situation}}", situation)\
                   .replace("{{lie_objective}}", lie_objective)

# def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str ) -> str:
#     """
#     将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
#     同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}"。
#     """
#     result = text.replace("A", f"{A_name}")
#     result = result.replace("B", f"{B_name}")
#     result = result.replace("C", f"{C_name}")
#     result = result.replace("D", f"{D_name}")
#     return result

In [636]:
import re

def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str) -> str:
    """
    将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
    同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}" 等。
    采用一次性替换，避免替换后的内容被再次修改。
    """
    # 定义替换映射
    mapping = {
        "A": A_name,
        "B": B_name,
        "C": C_name,
        "D": D_name
    }
    # 构建正则表达式，先匹配形如 A's 的情况，再匹配单个字母
    pattern = re.compile(r"(A's|B's|C's|D's|A|B|C|D)")
    
    def replacer(match):
        token = match.group(0)
        if token.endswith("'s"):
            # 处理 A's、B's、C's、D's 的情况
            letter = token[0]
            return mapping[letter] + "'s"
        else:
            return mapping[token]
    
    return pattern.sub(replacer, text)


Denishia, Denishia's, Shiela, Shiela's, Alvin, Alvin's, Olvin, Olvin's


In [386]:
# list for situation
# concert
# shopping
# painting
# ice skating

## 2. template

### 2.0 import elements from the conv_element.json

In [659]:
import json
with open("/Users/liuyiwei/Desktop/TactfulToM/dataset/Tactful_conv_element.json", "r", encoding="utf-8") as f:
    data_list = json.load(f)

In [660]:
data = data_list[20]
data

{'set_id': '0-4-3-2',
 'lie_id': '3-2',
 'conv_id': 4,
 'truth_id': 1,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': 'having a cozy home is important for maintaining a good mood',
  'situation_topic': 'renew home decoration goods for summer mood',
  'situation': 'Fleta invites Adama and Gwyndolyn to go to Ikea together this weekend',
  'lie_objective': 'decline the invitation',
  'leave_reason_B': 'need to troubleshoot something',
  'leave_reason_D_1': 'have to briefly assist a coworker',
  'leave_reason_D_2': 'need to grab a snack or a drink'},
 'relationship': 'coworkers',
 'muiltiple_liar': True,
 'muiltiple_target': False,
 'real_reason_type': True,
 'characters': {'liar': 'Adama',
  'target': 'Fleta',
  'accomplice': 'Gwyndolyn',
  'observer': 'Paxtyn'},
 'lie': {'real_reason_c': "Adama and Gwyndolyn don't like Fleta's decoration style for home, so they do not want to buy home stuff with Fleta because it would be rude to say that to Fleta's face",

In [43]:
# 最外层直接取值
set_id = data["set_id"]
lie_id = data["lie_id"]
conv_id = data["conv_id"]
truth_id = data["truth_id"]
lie_type = data["lie_type"]
emotion = data["emotion"]

# 取 topic 下面的嵌套字段
scenario = data["topic"]["scenario"]
situation_topic = data["topic"]["situation_topic"]
situation = data["topic"]["situation"]
lie_objective = data["topic"]["lie_objective"]
leave_reason_B = data["topic"]["leave_reason_B"]
leave_reason_D_1 = data["topic"]["leave_reason_D_1"]
leave_reason_D_2 = data["topic"]["leave_reason_D_2"]

relationship = data["relationship"]

# 取 characters 下面的嵌套字段
A_name = data["characters"]["liar"]
B_name = data["characters"]["target"]
C_name = data["characters"]["accomplice"]
D_name = data["characters"]["observer"]

# 取 lie 下面的嵌套字段
real_reason_c = data["lie"]["real_reason_c"]
lie_c = data["lie"]["lie_c"]
truth_c = data["lie"]["truth_c"]
falsification = data["lie"]["falsification"]

# 最外层的上下文
full_context = data["full_context"]
short_context = data["short_context"]


### 2.1 set the elements for prompt

In [1695]:
leave_reason_B, leave_reason_D_1, leave_reason_D_2 = random.sample(leave_reasons, 3)
print(leave_reason_B)
print(leave_reason_D_1)
print(leave_reason_D_2)

have an unexpected visitor at my door
have to refill my water bottle
need to quickly verify something important


In [1696]:
# leave_reason_B = 'use the bathroom'
# leave_reason_D_1 = 'have a urgent call'
# leave_reason_D_2 = 'use the bathroom'
leave_reason_B = 'feed the dog'
leave_reason_D_1 = 'need to quickly verify something important'
leave_reason_D_2 = 'have an unexpected visitor at the door'

In [1697]:
random_names = df_top_30['Name'].sample(n=4).to_list()
A_name, B_name, C_name, D_name = random_names
print(f"A_name = {A_name}")
print(f"B_name = {B_name}")
print(f"C_name = {C_name}")
print(f"D_name = {D_name}")

A_name = Drucilla
B_name = Eugenia
C_name = Marycruz
D_name = Katey


In [1698]:
lie_type = "altruistic_white_lies"
emotion = "sad"

# Set your variables
set_id = "3-14-1-1"
lie_id = "1-1"
conv_id = 14
########
# truth_id = 0
# falsification = None  # 赋值为空值（null）
########
truth_id = 1
falsification = True

# falsification = False
########

muiltiple_liar = True
# muiltiple_liar = False

# muiltiple_target = True
muiltiple_target = False

scenario = "chatting casually about B's summer plan to learn piano"

# real_reason是否是针对target的
# true是针对个人的
# false不是针对个人的
real_reason_type = True
# real_reason_type = False
real_reason_c = "parent A and C want support B's confidence and encourage B to stay interested in music, even though b isn't very good at it"

lie_objective = "Boost B's confidence and enthusiasm"

situation_topic = "B is taking violion courses previously"
truth_c = "parent A and C actually believe that B isn’t musically talented at all"

situation = "B is excited to share the dreaming of becoming an musician"
lie_c = "parent A and C smile and say B is very talented"


relationship = "families (parents, one kid, aunt/uncle)"

#下面这几句话不能有大写的ABCD
real_reason_c = replace_ABCD_with_name(real_reason_c, A_name, B_name, C_name, D_name)
lie_c = replace_ABCD_with_name(lie_c, A_name, B_name, C_name, D_name)
truth_c = replace_ABCD_with_name(truth_c, A_name, B_name, C_name, D_name)
situation = replace_ABCD_with_name(situation, A_name, B_name, C_name, D_name)
lie_objective = replace_ABCD_with_name(lie_objective, A_name, B_name, C_name, D_name)
situation_topic = replace_ABCD_with_name(situation_topic, A_name, B_name, C_name, D_name)
scenario = replace_ABCD_with_name(scenario, A_name, B_name, C_name, D_name)

### 2.2 put elements into generation prompt

#### 2.2.1 Single liar

##### 2.2.1.1 provide truth

In [1701]:
########## 提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion要删掉
step_1 = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2 = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# but do not propose any plans with others in the conversation之后不是invitation了要删掉
# 提供truth
step_3 = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
The conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation. 
And then '{{truth_c}}' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally without directly repeating these exact words.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# In response, {{A: the liar name}} tells a white lie that '{{the lie}}', in order to '{{lie_objective}}'.
step_4 = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally {{situation}}. In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}', in order to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When {{A: the liar name}} decline invitation, should simply decline without suggesting alternative plans or future possibilities.”
"""

# Populate template
filled_step_1 = populate_template(step_1, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing chatting casually about Eugenia's summer plan to learn piano while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Drucilla, Eugenia, Marycruz and Katey.
They are families (parents, one kid, aunt/uncle).

Now, Eugenia leaves the conversation because of the reason: feed the dog, while Katey leaves the conversation because of the reason: need to quickly verify something important.
With Eugenia and Katey absent, Drucilla and Marycruz continue their conversation, shifting the topic to 'parent Drucilla and Marycruz want support Eugenia's confidence and encourage Eugenia to stay interested in music, even though b isn't very good at it'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Eugenia and Katey are leaving the conversation.

Now, only Katey returns and rejoins the conversation a

##### 2.2.1.2 Do not provide truth

In [1700]:
########## 不提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion
step_1_notruth = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
# but do not propose any plans with others in the conversation
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}, but do not propose any plans with others in the conversation. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues with {{situation_topic}}. Make sure the dialogue flows naturally without directly repeating these exact words.
And then {{situation}} naturally unfolds.
In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}' to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""
# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing chatting casually about Eugenia's summer plan to learn piano while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Drucilla, Eugenia, Marycruz and Katey.
They are families (parents, one kid, aunt/uncle).

Now, Eugenia leaves the conversation because of the reason: feed the dog, while Katey leaves the conversation because of the reason: need to quickly verify something important.
With Eugenia and Katey absent, Drucilla and Marycruz continue their conversation, shifting the topic to 'parent Drucilla and Marycruz want support Eugenia's confidence and encourage Eugenia to stay interested in music, even though b isn't very good at it'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Eugenia and Katey are leaving the conversation.

Now, only Katey returns and rejoins the conversation a

#### 2.2.2 Mutiple liars

##### 2.2.2.1 provide truth

In [1682]:
########## 不提供truth改成提供truth ################
step_1_notruth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}. {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 提供truth:
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
And then '{{truth_c}}' is natrually revealed in the conversation. Ensure the dialogue flows naturally without directly repeating these exact words.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# {{leave reason B}}
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: have to go to the restroom. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Belinda, Ayra, Leo and Mia.
Start the conversation with all participants discussing chatting casually about Ayra's hobbies and recent school life.
They are families (parents, one kid, aunt/uncle). Belinda and Leo are parents, Ayra is kid, and Mia is aunt/uncle.

Now, Ayra leaves the conversation because of the reason: feed the cat, while Mia leaves the conversation because of the reason: have to quickly arrange something for a later meeting.
With Ayra and Mia absent, Belinda and Leo continue their conversation, shifting the topic to 'parent Belinda and Leo want support Ayra's confidence and encourage Ayra to stay interested in their hobby even when Ayra is not good at it'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Ayra and Mia are leaving the conversation.

Now, only Mia returns and rejoins the conversation after after previously leaving due to: have to quickly ar

In [1618]:
# ########## 提供truth!! ################
# step_1 = """Start the conversation with all participants discussing {{Topic for the scenario}}.
# In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
# They are {{relationship descriptor}}. {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
# """

# step_2 = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
# With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
# Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
# """

# # 提供truth
# step_3 = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
# The conversation continues with {{Topic for the scenario}}.Do NOT catch up or recap details from the previous conversation. 
# And then '{{truth_c}}' is natrually revealed in the conversation. Ensure the dialogue flows naturally without directly repeating these exact words.
# After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
# IMPORTANT:
# - Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
# - Do NOT use third-person narrative or stage directions.
# - Avoid mentioning details from previous conversations.
# """

# # mention it's family trip
# # In response, {{A: the liar name}} tells a white lie that '{{the lie}}', in order to '{{lie_objective}}'.
# step_4 = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
# First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
# Then the conversation naturally shifts to {{situation_topic}}, where natrurally {{situation}}. mention it's family trip. In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}', in order to '{{lie_objective}}'. 
# IMPORTANT:
# - Do NOT use third-person narrative or stage directions.
# - Avoid mentioning details from previous conversations.
# """

# # Populate template
# filled_step_1 = populate_template(step_1, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
# filled_step_2 = populate_template(step_2, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
# filled_step_3 = populate_template(step_3, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
# filled_step_4 = populate_template(step_4, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
# print(filled_step_1)
# print(filled_step_2)
# print(filled_step_3)
# print(filled_step_4)

##### 2.2.2.2 Do not provide truth

In [1607]:
########## 不提供truth!! ################
step_1_notruth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}. {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# {{leave reason B}}
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: have to go to the restroom. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues with {{situation_topic}}. And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Easter, Leora, Marleen and Wayne.
Start the conversation with all participants discussing talking about Leora's recent life in kindergarten.
They are families (parents, one kid, aunt/uncle). Easter and Marleen are parents, Leora is kid, and Wayne is aunt/uncle.

Now, Leora leaves the conversation because of the reason: use the bathroom, while Wayne leaves the conversation because of the reason: have a urgent call.
With Leora and Wayne absent, Easter and Marleen continue their conversation, shifting the topic to 'the kid Leora often feels scared and insecure about many things at night'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Leora and Wayne are leaving the conversation.

Now, only Wayne returns and rejoins the conversation after after previously leaving due to: have a urgent call. Note that Leora has not returned to the conversation yet.
Then the conversation co

### 2.3 generation

#### step 1

In [1705]:
result_1 = generate_natural_conversation_step1(filled_step_1)
part_1 = result_1.choices[0].message['content']
print(part_1)

**Eugenia:** Hey everyone! Guess what? I’ve finally decided to learn how to play the piano this summer!

**Drucilla:** Oh, that's wonderful, Eugenia! It's such a beautiful instrument. What made you want to learn it?

**Eugenia:** I've always loved the sound, but recently I've been inspired by listening to some classical pieces. Plus, I think it will be a perfect summer project.

**Marycruz:** That’s such an exciting plan! I remember when I tried playing the piano as a child. It's harder than it looks, but so rewarding once you get the hang of it.

**Katey:** Mom, remember the time you tried teaching me piano, and all I could play was "Chopsticks"? I guess music didn't exactly run in the family, huh?

**Drucilla:** Haha, those were some fun times, Katey. But who knows? Maybe Eugenia will discover a hidden talent we never knew existed.

**Eugenia:** I hope so! And who knows, maybe a few lessons and I’ll be able to play something more sophisticated than "Chopsticks."

**Marycruz:** Don’t 

#### step 2

In [1706]:
input_2 = filled_step_2 + "\n"+ part_1
print(input_2)

Now, Eugenia leaves the conversation because of the reason: feed the dog, while Katey leaves the conversation because of the reason: need to quickly verify something important.
With Eugenia and Katey absent, Drucilla and Marycruz continue their conversation, shifting the topic to 'parent Drucilla and Marycruz want support Eugenia's confidence and encourage Eugenia to stay interested in music, even though b isn't very good at it'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Eugenia and Katey are leaving the conversation.

**Eugenia:** Hey everyone! Guess what? I’ve finally decided to learn how to play the piano this summer!

**Drucilla:** Oh, that's wonderful, Eugenia! It's such a beautiful instrument. What made you want to learn it?

**Eugenia:** I've always loved the sound, but recently I've been inspired by listening to some classical pieces. Plus, I think it will be a perfect summer project.

**Marycruz:** That’s such a

In [1707]:
result_2 = generate_natural_conversation_step2(input_2)
part_2 = result_2.choices[0].message['content']
print(part_2)

**Eugenia:** Thanks, everyone! Speaking of which, I should go feed Max before he decides to take matters into his own paws. Talk to you later!

**Katey:** And I just realized I need to verify something really important. Bye for now, everyone!

**Drucilla:** Bye, Eugenia and Katey! So, Marycruz, what do you think we can do to help encourage Eugenia's musical journey?

**Marycruz:** Well, I think we need to focus on making the process enjoyable. Maybe we can find some engaging music apps or suggest joining a local group for beginners. It’s important she feels supported and that progress is celebrated, no matter how small.

**Drucilla:** That's a good idea. Perhaps we could attend a few performances together, too. Seeing live music can be quite inspiring and show her what she’s working towards.

**Marycruz:** Definitely! And maybe we can make some family nights where Eugenia showcases what she's learned. It doesn’t matter if it’s perfect; it’s about supporting her effort and growth.

**Dr

#### step 3

In [1708]:
input_3 = filled_step_3 + "\n" + part_1 + "\n\n" + part_2
print(input_3)

Now, only Katey returns and rejoins the conversation after after previously leaving due to: need to quickly verify something important. Note that Eugenia has not returned to the conversation yet.
The conversation continues and naturally shifts to Eugenia is taking violion courses previously. Do NOT catch up or recap details from the previous conversation. 
And then 'parent Drucilla and Marycruz actually believe that Eugenia isn’t musically talented at all' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally without directly repeating these exact words.
After engaging briefly , Katey leaves the conversation again because of the reason: have an unexpected visitor at the door.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that Katey first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentionin

In [1709]:
result_3 = generate_natural_conversation_step3(input_3)
part_3 = result_3.choices[0].message['content']
print(part_3)

**Katey:** Hey, I’m back! Sorry for ducking out earlier, but I'm ready to catch up now.

**Marycruz:** Welcome back, Katey! We were just talking about Eugenia’s plan to learn piano.

**Katey:** Oh, that sounds great! I remember she did take violin lessons before, right? How did that go for her?

**Marycruz:** Yeah, she did give the violin a shot for a while. But I think she enjoys piano more. It seems to align better with her interests now.

**Drucilla:** It's true. The violin was a bit of a challenge for her, and some folks didn't think it was her forte. But who knows? Maybe this time with the piano, things will click.

**Katey:** Hopefully! Sometimes it's just about finding the right fit. Anyway, it sounds like you all have some good plans for supporting her.

**Drucilla:** We’re trying. The main thing is to keep it enjoyable and low-pressure so she can really thrive.

**Katey:** That’s the way to do it. Oh, hold on, someone’s at my door. I have to leave again now. Hopefully, it's no

#### step 4

In [1710]:
input_4 = filled_step_4+ "\n" + part_1 + "\n\n" + part_2 + "\n\n" +part_3
print(input_4)

Now Eugenia returns to the conversation after leaving the conversation because of the reason: feed the dog. 
First have Eugenia explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally Eugenia is excited to share the dreaming of becoming an musician. In response, Drucilla tells Eugenia that 'parent Drucilla and Marycruz smile and say Eugenia is very talented', in order to 'Eugeniaoost Eugenia's confidence and enthusiasm'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When Drucilla decline invitation, should simply decline without suggesting alternative plans or future possibilities.”

**Eugenia:** Hey everyone! Guess what? I’ve finally decided to learn how to play the piano this summer!

**Drucilla:** Oh, that's wonderful, Eugenia! It's such a beautiful instrument. What made you want to learn it?

**

In [1711]:
result_4 = generate_natural_conversation_step4(input_4)
part_4 = result_4.choices[0].message['content']
print(part_4)

**Eugenia:** I'm back! Max has been fed, and I'm ready to dive back into our chat.

**Marycruz:** Welcome back, Eugenia! We were just discussing all the exciting ways to support you in your piano journey.

**Eugenia:** I'm so excited about it! I’ve been dreaming of becoming a musician for ages. Just imagining playing my favorite songs is thrilling.

**Drucilla:** Parent Drucilla and Marycruz smile and say Eugenia is very talented. We believe in you, Eugenia. You have so much potential, and we know you'll be amazing.

**Marycruz:** Absolutely, Eugenia. You've always had a great sense of rhythm and an ear for music. This is the perfect path for you.

**Eugenia:** Thank you so much! Your encouragement means the world to me. I can’t wait to get started and make you all proud with what I learn!


In [1600]:
# result_41 = generate_natural_conversation_step4(input_4)
# part_41 = result_41.choices[0].message['content']
# print(part_41)

In [1601]:
# part_4 = part_41

#### combine the conversation

In [1712]:
# 10
# 6
# 6
# 6
full_context = part_1 + part_2 + part_3 + part_4
short_context = part_2 + part_3 + part_4

In [1713]:
import tiktoken
# 以 GPT-4 模型为例，选择对应的模型编码器
encoding = tiktoken.encoding_for_model("gpt-4o")
full_context_tokens = encoding.encode(full_context)
short_context_tokens = encoding.encode(short_context)
# 分别打印长度（字符数）和token数
print(f"Full context length (tokens): {len(full_context_tokens)}")
print(f"Short context length (tokens): {len(short_context_tokens)}")

Full context length (tokens): 1069
Short context length (tokens): 740


## 3. json

In [1714]:
# 将所有字段分组放进一个字典
data_dict = {
    "set_id": set_id,
    "lie_id": lie_id,
    "conv_id": conv_id,
    "truth_id": truth_id,
    "lie_type": lie_type,
    "emotion": emotion,
    "topic": {
        "scenario": scenario,
        "situation_topic": situation_topic,
        "situation": situation,
        "lie_objective": lie_objective,
        "leave_reason_B": leave_reason_B,
        "leave_reason_D_1": leave_reason_D_1,
        "leave_reason_D_2": leave_reason_D_2,
    },
    "relationship": relationship,
    "muiltiple_liar": muiltiple_liar,
    "muiltiple_target": muiltiple_target,
    "real_reason_type": real_reason_type,
    "characters": {
        "liar": A_name,
        "target": B_name,
        "accomplice": C_name,
        "observer": D_name
    },
    "lie": {
        "real_reason_c": real_reason_c,
        "lie_c": lie_c,
        "truth_c": truth_c,
        "falsification": falsification
    },
    "part_1": part_1,
    "part_2": part_2,
    "part_3": part_3,
    "part_4": part_4,
    "full_context": full_context,
    "short_context": short_context,
    "full_context_tokens": len(full_context_tokens),
    "Short_context_tokens": len(short_context_tokens)
}

In [1715]:
# 写一个判断如果 'falsification' = null不写入 'truth_c'
if data_dict.get('truth_id') == 0:
    data_dict['lie']['truth_c'] = ''
# 记得写入json
append_data_to_json(data_dict, "/Users/liuyiwei/Desktop/TactfulToM/dataset/Tactful_conv_element.json")
data_dict

{'set_id': '3-14-1-1',
 'lie_id': '1-1',
 'conv_id': 14,
 'truth_id': 1,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': "chatting casually about Eugenia's summer plan to learn piano",
  'situation_topic': 'Eugenia is taking violion courses previously',
  'situation': 'Eugenia is excited to share the dreaming of becoming an musician',
  'lie_objective': "Eugeniaoost Eugenia's confidence and enthusiasm",
  'leave_reason_B': 'feed the dog',
  'leave_reason_D_1': 'need to quickly verify something important',
  'leave_reason_D_2': 'have an unexpected visitor at the door'},
 'relationship': 'families (parents, one kid, aunt/uncle)',
 'muiltiple_liar': True,
 'muiltiple_target': False,
 'real_reason_type': True,
 'characters': {'liar': 'Drucilla',
  'target': 'Eugenia',
  'accomplice': 'Marycruz',
  'observer': 'Katey'},
 'lie': {'real_reason_c': "parent Drucilla and Marycruz want support Eugenia's confidence and encourage Eugenia to stay interested in music, 

In [1289]:
# # 记得写入json
# append_data_to_json(data_dict, "/Users/liuyiwei/Desktop/TactfulToM/dataset/Tactful_conv_element.json")

In [1290]:
def append_data_to_json(data_dict, filename="/Users/liuyiwei/Desktop/whiteToM/dataset/Tactful_conv_element.json"):
    # 1. 先尝试读取已有的 JSON 文件，如果不存在则创建一个空列表
    if os.path.exists(filename):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data_list = json.load(f)
                # 如果文件不是一个列表，可能需要根据实际情况做容错处理
                if not isinstance(data_list, list):
                    data_list = []
            except json.JSONDecodeError:
                # 如果文件不是合法的 JSON 格式，就初始化为空列表
                data_list = []
    else:
        data_list = []
    
    # 2. 向列表中追加新的字典
    data_list.append(data_dict)
    
    # 3. 将更新后的列表重新写回 JSON 文件
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data_list, f, ensure_ascii=False, indent=4)

In [55]:
# # gpt rewrite 一下real_reason_c
# real_reason_q = "Alice’s social battery is really running dry these days, and she thinks she needs more time to be alone."
# lie_q = "Alice has a meeting in the afternoon, so she won’t be able to join for studying."
# truth_q = "Alice is actually free for the whole afternoon."