In [379]:
import pandas as pd
import random
import kagglehub
import os
import json
import re

In [380]:
from conv_generation_gpt_api import generate_natural_conversation_step1, generate_natural_conversation_step2, generate_natural_conversation_step3, generate_natural_conversation_step4

## 1. set the elements for conversation template

In [3]:
# 从名字脚本里面随机选择四个名字
# Download latest version
path = kagglehub.dataset_download("ryanburnsworth/popular-names-by-birth-year-1880-2022")
print("Path to dataset files:", path)

# List all files in the directory to find the correct CSV file
# files = os.listdir(path)
# print("Files in directory:", files)

# Load the CSV file into a pandas DataFrame
file_path = os.path.join(path, 'names_by_birth_year.csv')
df = pd.read_csv(file_path)
print(df.head())

df_sorted = df.sort_values(by="Count", ascending=False) # 1. 按 Count 降序排序
top_20_count = int(len(df_sorted) * 0.2)
df_top_20 = df_sorted.iloc[:top_20_count]  

Path to dataset files: /Users/liuyiwei/.cache/kagglehub/datasets/ryanburnsworth/popular-names-by-birth-year-1880-2022/versions/1
   Year       Name Gender  Count
0  1883       Mary      F   8012
1  1883       Anna      F   3306
2  1883       Emma      F   2367
3  1883  Elizabeth      F   2255
4  1883     Minnie      F   2035


In [4]:
# 1. 将所有离开原因写成一个列表
leave_reasons = [
    "bathroom break",
    "coffee break",
    "forgot something important",
    "forgot to print some documents",
    "forgot to recieve a package",
    "forgot to return a package",
    "forgot to run errands",
    "forgot to submit documents",
    "have a meeting starting soon that I need to prepare for",
    "have a previous engagement that I need to attend to quickly",
    "have a work-related emergency that requires my immediate attention",
    "have an unexpected visitor at my door",
    "have errands to run",
    "have to attend to someone who just walked in",
    "have to check on something",
    "have to go to the restroom",
    "have to pick up a prescription",
    "have to pick up dry cleaning",
    "have to print or scan documents",
    "have to receive a delivery",
    "have to recharge laptop",
    "have to return a borrowed item",
    "have to take care of a family matter",
    "have to take care of an unexpected task",
    "have unexpected visitor",
    "his/her pet needs attention",
    "his/her family is calling",
    "incoming delivery",
    "must respond to a phone call",
    "need to check on a friend or family member who needs assistance",
    "need to finish a task that’s time-sensitive",
    "need to get a phone call",
    "need to get some coffee",
    "need to go to the toilet",
    "need to grab a snack or a drink",
    "need to have a quick chat with someone else",
    "need to make a phone call",
    "need to make a quick trip to the drug store",
    "need to make a quick trip to the grocery store",
    "need to pick up a package",
    "need to receive a parcel",
    "need to recharge cellphone",
    "need to register for an event",
    "need to schedule a haircut or salon appointment",
    "need to schedule another appointment",
    # "need to step away for a moment to stretch and clear my mind",
    "need to step out for a moment",
    "need to submit some papers",
    "need to take care of some paperwork or documents",
    "need to take care of some personal matters",
    # "need to take care of something related to my health",
    "need to take care of something urgent",
    "need to troubleshoot something",
    "parking meter expiring",
    "remembered something that needs to be taken care of",
    "remembered to receive a package",
    "remembered to submit some papers",
    "remembered to take care of some paperwork or documents",
    "remembered to take care of some personal matters",
    "remembered to take care of something urgent",
    "want to go grab a drink",
    "want to go grab a coffee",
    "want to go take some fresh air",
    "want to go to the bathroom",
    # 下面是让gpt加的
    "need to move my car",
    "have to take an urgent call from my boss",
    "need to check my emails quickly",
    "have to respond to an important message",
    "need to restart my computer",
    "have to take a quick medication",
    "need to handle a minor household emergency",
    # "have to answer the doorbell",
    "have to refill my water bottle",
    "have to feed my pet",
    "have to water my plants",
    # "have to address a security alert at home",
    "have to take a brief walk to clear my mind",
    "need to step outside briefly to meet someone",
    "have to adjust the thermostat",
    "need to quickly tidy up my workspace",
    "need to quickly verify something important",
    # "have to quickly fix something technical",
    # "need to step away briefly due to loud background noise",
    "have to quickly arrange something for a later meeting",
    "need to briefly step out to confirm travel arrangements",
    "have to take care of an urgent email",
    "have to briefly assist a coworker",
    "need to briefly leave to verify appointment details",
    # "have to check on an appliance or device",
    "have to briefly tend to something outside",
    "have to quickly reschedule an upcoming meeting",
    "need to briefly attend to my child",
    # "have to briefly check the weather due to upcoming plans",
    # "need to quickly confirm my availability",
    "need to grab a quick snack",
    "need to quickly stretch my legs",
    "have to briefly troubleshoot my internet connection",
    "have to briefly step out for privacy reasons",
    "need to quickly tidy the room before another meeting",
    # "have to briefly step away to manage background distractions",
    "have to quickly update someone about my status",
    "need to briefly review notes or materials",
    "have to briefly leave to answer an urgent text",
    "have to briefly assist someone else in the household",
    # "need to briefly rest my eyes due to strain",
    "have to quickly plug in my device to charge",
    "need to briefly leave to retrieve an important item",
    "have to step away briefly to close a window or door",
    "need to quickly ensure I turned off an appliance",
    "have to briefly step away for a personal reason",
    "need to briefly step away due to allergy or health symptoms"
]

# # 3. 测试输出
# print("leave_reason_B:", leave_reason_B)
# print("leave_reason_D:", leave_reason_D)

In [5]:
# Function to populate template with variables
def populate_template(template, scenario, relationship, situation, lie_objective, real_reason, the_lie, the_fact, the_emotion_to_avoid, A_name, B_name, C_name, D_name):
    return template.replace("{{Topic for the scenario}}", scenario)\
                   .replace("{{relationship descriptor}}", relationship)\
                   .replace("{{A: the liar name}}", A_name)\
                   .replace("{{B: the target name}}", B_name)\
                   .replace("{{C: the accomplice name}}", C_name)\
                   .replace("{{D: the observer name}}", D_name)\
                   .replace("{{leave reason B}}", leave_reason_B)\
                   .replace("{{leave reason D_1}}", leave_reason_D_1)\
                   .replace("{{leave reason D_2}}", leave_reason_D_2)\
                   .replace("{{real_reason_c}}", real_reason_c)\
                   .replace("{{truth_c}}", truth_c if truth_c is not None else "")\
                   .replace("{{the lie}}", lie_c)\
                   .replace("{{situation_topic}}", situation_topic)\
                   .replace("{{situation}}", situation)\
                   .replace("{{lie_objective}}", lie_objective)

# def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str ) -> str:
#     """
#     将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
#     同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}"。
#     """
#     result = text.replace("A", f"{A_name}")
#     result = result.replace("B", f"{B_name}")
#     result = result.replace("C", f"{C_name}")
#     result = result.replace("D", f"{D_name}")
#     return result

In [6]:
def replace_ABCD_with_name(text: str, A_name: str, B_name: str, C_name: str, D_name: str) -> str:
    """
    将文本中的 "A's" 替换为 "{A_name}'s"；"A" 替换为 "{A_name}"；
    同理 "B's" 替换为 "{B_name}'s"；"B" 替换为 "{B_name}" 等。
    采用一次性替换，避免替换后的内容被再次修改。
    """
    # 定义替换映射
    mapping = {
        "A": A_name,
        "B": B_name,
        "C": C_name,
        "D": D_name
    }
    # 构建正则表达式，先匹配形如 A's 的情况，再匹配单个字母
    pattern = re.compile(r"(A's|B's|C's|D's|A|B|C|D)")
    
    def replacer(match):
        token = match.group(0)
        if token.endswith("'s"):
            # 处理 A's、B's、C's、D's 的情况
            letter = token[0]
            return mapping[letter] + "'s"
        else:
            return mapping[token]
    
    return pattern.sub(replacer, text)

In [7]:
# list for situation
# concert
# shopping
# painting
# ice skating

## 2. template

### 2.0 import elements from the conv_element.json

In [824]:
import json
with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json", "r", encoding="utf-8") as f:
    data_list = json.load(f)

In [826]:
data = data_list[4]
data

{'set_id': '4-13-0-0',
 'lie_id': '0-0',
 'conv_id': 13,
 'truth_id': 0,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': 'casual chat about summer approaching and vacation expectations',
  'situation_topic': "chatting about birthday wishes and reflections on aging, because Kaleo and Ricki's birthday are this week",
  'situation': 'Kaleo gave Doyle a fresh flower bouquet for his birthday yesterday as a surpurise birthday gift',
  'lie_objective': 'avoid making Kaleo feel bad about the ill-chosen gift',
  'leave_reason_B': 'have a urgent call',
  'leave_reason_D_1': 'use the bathroom',
  'leave_reason_D_2': 'feed the dog'},
 'relationship': 'friends',
 'muiltiple_liar': False,
 'real_reason_type': False,
 'characters': {'liar': 'Doyle',
  'target': 'Kaleo',
  'accomplice': 'Ricki',
  'observer': 'Blake'},
 'lie': {'real_reason_c': 'Doyle tends to be particular about gifts and rarely likes what others choose, and Doyle is seldom satisfied with gifts from ot

In [827]:
# 最外层直接取值
set_id = data["set_id"]
lie_id = data["lie_id"]
conv_id = data["conv_id"]
truth_id = data["truth_id"]
lie_type = data["lie_type"]
emotion = data["emotion"]

# 取 topic 下面的嵌套字段
scenario = data["topic"]["scenario"]
situation_topic = data["topic"]["situation_topic"]
situation = data["topic"]["situation"]
lie_objective = data["topic"]["lie_objective"]
leave_reason_B = data["topic"]["leave_reason_B"]
leave_reason_D_1 = data["topic"]["leave_reason_D_1"]
leave_reason_D_2 = data["topic"]["leave_reason_D_2"]

relationship = data["relationship"]

# 取 characters 下面的嵌套字段
A_name = data["characters"]["liar"]
B_name = data["characters"]["target"]
C_name = data["characters"]["accomplice"]
D_name = data["characters"]["observer"]

# 取 lie 下面的嵌套字段
real_reason_c = data["lie"]["real_reason_c"]
lie_c = data["lie"]["lie_c"]
truth_c = data["lie"]["truth_c"]
falsification = data["lie"]["falsification"]

# 最外层的上下文
part_1 = data["part_1"]
part_2 = data["part_2"]
part_3 = data["part_3"]
part_4 = data["part_4"]
# full_context = data["full_context"]
# short_context = data["short_context"]

### 2.1 set the elements for prompt

In [899]:
leave_reason_B, leave_reason_D_1, leave_reason_D_2 = random.sample(leave_reasons, 3)
print(leave_reason_B)
print(leave_reason_D_1)
print(leave_reason_D_2)

have to take care of an unexpected task
have to briefly tend to something outside
have to quickly plug in my device to charge


In [873]:
# leave_reason_B = 'have a urgent call'
# leave_reason_D_1 = 'use the bathroom'
# leave_reason_D_2 = 'feed the dog'
# # leave_reason_B = 'feed the dog'
# # leave_reason_D_1 = 'need to quickly stretch my legs'
# # leave_reason_D_2 = 'have to briefly assist someone else in the household'

In [897]:
random_names = df_top_20['Name'].sample(n=4).to_list()
A_name, B_name, C_name, D_name = random_names
print(f"A_name = {A_name}")
print(f"B_name = {B_name}")
print(f"C_name = {C_name}")
print(f"D_name = {D_name}")

A_name = Shauna
B_name = Everest
C_name = Dana
D_name = Syncere


In [905]:
lie_type = "altruistic_white_lies"
emotion = "sad"

# Set your variables
set_id = "4-13-3-2"
lie_id = "3-2"
conv_id = 13
########
# truth_id = 0
# falsification = None  # 赋值为空值（null）
########
truth_id = 1
# falsification = True
falsification = False
########

# muiltiple_liar = True
muiltiple_liar = False

# step 1
scenario = "casual chat about approaching vacation expectations for summer lake session"
relationship = "friends"

# step 2
# real_reason是否是针对target的
# true是针对target的 && false不是针对target的
# real_reason_type = True # 这个是只对B好
real_reason_type = False # 这个是对A和B都好1

# lie的三元组理论上不能有重叠比如realson不要出现truth
real_reason_c = "A feels genuine distress when given decor gifts, as incorporating mismatched items would disrupt their carefully planned living space"
real_reason_q = "A doesn't like receive home decor that may not fit their design vision"


# step 3
# situation_topic是在step3里面用来自然引出truth_c的
situation_topic = "chatting A will move into a new house with the lake view it's really exciting "
# # # truth有时候是关于AC对于B的
# falsified = true
# 因为去掉了avoid repeating exactly the same word这里的话c要比q长一些
# truth_c = "A exclusively furnishes their home with one-of-a-kind vintage finds from thrift stores, and dislikes mass-produced decor items"
# truth_q = "A only uses secondhand unique items in A's home decor"
truth_c = "A has been methodically sourcing each decor item for their new home"
truth_q = "A treats home decoration with precision" 
# falsified = false


# situation是在step4里面用来引出lie的
lie_objective = "avoid making B feel bad about the ill-chosen gift"
situation = "B unveils a vividly patterned boho rug with tassels from Ikea, completely opposite to A's minimalist style, and B is asking how A likes it"
lie_c = "A says, Thank you so much, I like it."
lie_q = "A likes the rug"


#下面这几句话不能有大写的ABCD
real_reason_c = replace_ABCD_with_name(real_reason_c, A_name, B_name, C_name, D_name)
lie_c = replace_ABCD_with_name(lie_c, A_name, B_name, C_name, D_name)
truth_c = replace_ABCD_with_name(truth_c, A_name, B_name, C_name, D_name)
# 简化lie的三元组可以直接用于question generation
real_reason_q = replace_ABCD_with_name(real_reason_q, A_name, B_name, C_name, D_name)
lie_q = replace_ABCD_with_name(lie_q, A_name, B_name, C_name, D_name)
truth_q = replace_ABCD_with_name(truth_q, A_name, B_name, C_name, D_name)
situation = replace_ABCD_with_name(situation, A_name, B_name, C_name, D_name)
lie_objective = replace_ABCD_with_name(lie_objective, A_name, B_name, C_name, D_name)
situation_topic = replace_ABCD_with_name(situation_topic, A_name, B_name, C_name, D_name)
scenario = replace_ABCD_with_name(scenario, A_name, B_name, C_name, D_name)

In [906]:
real_reason_c

'Shauna feels genuine distress when given decor gifts, as incorporating mismatched items would disrupt their carefully planned living space'

### 2.2 put elements into generation prompt

#### 2.2.1 Single liar

##### 2.2.1.1 provide truth

In [907]:
########## 提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion要删掉
step_1 = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2 = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# but do not propose any plans with others in the conversation之后不是invitation了要删掉
#  without directly repeating these exact words
# 提供truth
step_3 = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
The conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation. 
And then '{{truth_c}}' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# In response, {{A: the liar name}} tells a white lie that '{{the lie}}', in order to '{{lie_objective}}'.
step_4 = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally {{situation}}. In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}', in order to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When {{A: the liar name}} decline invitation, should simply decline without suggesting alternative plans or future possibilities.”
"""

# Populate template
filled_step_1 = populate_template(step_1, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing casual chat about approaching vacation expectations for summer lake session while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Shauna, Everest, Dana and Syncere.
They are friends.

Now, Everest leaves the conversation because of the reason: have to take care of an unexpected task, while Syncere leaves the conversation because of the reason: have to briefly tend to something outside.
With Everest and Syncere absent, Shauna and Dana continue their conversation, shifting the topic to 'Shauna feels genuine distress when given decor gifts, as incorporating mismatched items would disrupt their carefully planned living space'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Everest and Syncere are leaving the conversation.

Now, only Syncere returns and rejoins the conversation after

##### 2.2.1.2 Do not provide truth

In [853]:
########## 不提供truth!! ################
# while do not propose any plans for gathering together, just talk about everyone's opinion
step_1_notruth = """Start the conversation with all participants discussing {{Topic for the scenario}} while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
They are {{relationship descriptor}}.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
# but do not propose any plans with others in the conversation
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving due to: {{leave reason D_1}}. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# with {{situation_topic}}. Make sure the dialogue flows naturally without directly repeating these exact words.
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation because of the reason: {{leave reason B}}. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues, and then {{situation}} naturally unfolds. In response, {{A: the liar name}} tells {{B: the target name}} that '{{the lie}}' to '{{lie_objective}}'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""
# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)

Start the conversation with all participants discussing casual chat about approaching vacation expectations for summer lake session while do not propose any plans for gathering together, just talk about everyone's opinion.
In this conversation, four characters are participating: Amara, Pearl, Gerardo and Fern.
They are friends.

Now, Pearl leaves the conversation because of the reason: need to briefly review notes or materials, while Fern leaves the conversation because of the reason: have to return a borrowed item.
With Pearl and Fern absent, Amara and Gerardo continue their conversation, shifting the topic to 'Amara feels genuine distress when given decor gifts, as incorporating mismatched items would disrupt their carefully planned living space'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Pearl and Fern are leaving the conversation.

Now, only Fern returns and rejoins the conversation after after previously leaving due

#### 2.2.2 Mutiple liars

##### 2.2.2.1 provide truth

In [555]:
########## step3是situation_topic,step4是situation ################
########## 提供truth!! ################
step_1_truth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}. 
"""
# {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.


step_2_truth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 提供truth:
#  due to: {{leave reason D_1}}
step_3_truth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
And then '{{truth_c}}' is natrually revealed in the conversation. Ensure the dialogue flows naturally without directly repeating these exact words.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# because of the reason{{leave reason B}}
step_4_truth = """Now {{B: the target name}} returns to the conversation after leaving the conversation. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_truth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Atara, Esmeralda, Easton and Lucky.
Start the conversation with all participants discussing chatting casually about plan for chiristmas vacation.
They are friends. 

Now, Esmeralda leaves the conversation because of the reason: need to go to the toilet, while Lucky leaves the conversation because of the reason: have to feed my pet.
With Esmeralda and Lucky absent, Atara and Easton continue their conversation, shifting the topic to 'Atara and Easton want to encourage Esmeralda to whether it taste good or not after seeing how hard Esmeralda tried with the christmas dinner '. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Esmeralda and Lucky are leaving the conversation.

Now, only Lucky returns and rejoins the conversation after after previously leaving. Note that Esmeralda has not returned to the conversation yet.
Then the conversation continues and naturally shifts to 

##### 2.2.2.2 Do not provide truth

In [409]:
########## 不提供truth!! ################
########## step3是situation_topic,step4是situation_topic + situation ################
#  {{A: the liar name}} and {{C: the accomplice name}} are parents, {{B: the target name}} is kid, and {{D: the observer name}} is aunt/uncle.
step_1_notruth = """
In this conversation, four characters are participating: {{A: the liar name}}, {{B: the target name}}, {{C: the accomplice name}} and {{D: the observer name}}.
Start the conversation with all participants discussing {{Topic for the scenario}}.
They are {{relationship descriptor}}.
"""

step_2_notruth = """Now, {{B: the target name}} leaves the conversation because of the reason: {{leave reason B}}, while {{D: the observer name}} leaves the conversation because of the reason: {{leave reason D_1}}.
With {{B: the target name}} and {{D: the observer name}} absent, {{A: the liar name}} and {{C: the accomplice name}} continue their conversation, shifting the topic to '{{real_reason_c}}'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that {{B: the target name}} and {{D: the observer name}} are leaving the conversation.
"""
# 不提供truth:
# due to: {{leave reason D_1}}
step_3_notruth = """Now, only {{D: the observer name}} returns and rejoins the conversation after after previously leaving. Note that {{B: the target name}} has not returned to the conversation yet.
Then the conversation continues and naturally shifts to {{situation_topic}}. Do NOT catch up or recap details from the previous conversation.
After engaging briefly , {{D: the observer name}} leaves the conversation again because of the reason: {{leave reason D_2}}.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that {{D: the observer name}} first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# , in order to '{{lie_objective}}'
# {{leave reason B}}
# because of the reason: have to go to the restroom
step_4_notruth = """Now {{B: the target name}} returns to the conversation after leaving the conversation. 
First have {{B: the target name}} explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then, the conversation continues with {{situation_topic}}. And then {{situation}} naturally unfolds. Make sure the dialogue flows naturally without directly repeating these exact words.
In response, {{A: the liar name}} and {{C: the accomplice name}} tells {{B: the target name}} that '{{the lie}}'.
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
"""

# Populate template
filled_step_1 = populate_template(step_1_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_2 = populate_template(step_2_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_3 = populate_template(step_3_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
filled_step_4 = populate_template(step_4_notruth, scenario, relationship, situation, lie_objective, real_reason_c, lie_c, truth_c, emotion, A_name, B_name, C_name, D_name)
print(filled_step_1)
print(filled_step_2)
print(filled_step_3)
print(filled_step_4)


In this conversation, four characters are participating: Chevy, Kathleen, Lowell and Glen.
Start the conversation with all participants discussing chatting casually about plan for the thanksgiving vacation.
They are friends.

Now, Kathleen leaves the conversation because of the reason: remembered to take care of some paperwork or documents, while Glen leaves the conversation because of the reason: need to check on a friend or family member who needs assistance.
With Kathleen and Glen absent, Chevy and Lowell continue their conversation, shifting the topic to 'Chevy and Lowell want to encourage Kathleen keeps holiday spirit after noticing how hard Kathleen tried to contribute to Thanksgiving dinner, despite Kathleen's history of culinary mishaps'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Kathleen and Glen are leaving the conversation.

Now, only Glen returns and rejoins the conversation after after previously leaving. N

In [392]:
# A_name = "Chevy"
# B_name = "Kathleen"
# # C_name = Lowell
# # D_name = Glen

### 2.3 generation

#### step 1

In [903]:
result_1 = generate_natural_conversation_step1(filled_step_1)
part_1 = result_1.choices[0].message['content']
print(part_1)

**Shauna:** Hey everyone! With summer just around the corner, I can't stop daydreaming about those lazy days by the lake. What about you all?

**Everest:** Ah, those lazy lake days are the best. I can’t wait to just kick back with a good book and soak up the sun. What are you most looking forward to, Dana?

**Dana:** Honestly, just dipping my feet in the cool water sounds like heaven. I always love getting that little tingle from the chill. What about you, Syncere?

**Syncere:** For me, it's all about fishing. Something about casting a line and just sitting in peace is so therapeutic. Plus, if I catch something, it just means fresh dinner!

**Shauna:** That sounds so relaxing, Syncere. I'm partial to early morning walks along the shore when everything’s still and quiet. The lake feels so magical at dawn.

**Everest:** Oh, I love those quiet mornings too, Shauna. And then, of course, there are those inevitable barbecue evenings. There's nothing like grilling by the lakeside.

**Dana:** 

#### step 2

In [904]:
input_2 = filled_step_2 + "\n"+ part_1
print(input_2)

Now, Everest leaves the conversation because of the reason: have to take care of an unexpected task, while Syncere leaves the conversation because of the reason: have to briefly tend to something outside.
With Everest and Syncere absent, Shauna and Dana continue their conversation, shifting the topic to 'Shauna feels genuine distress when given decor gifts, as incorporating mismatched items would disrupt their carefully planned living space'. Ensure the dialogue flows naturally without directly repeating these exact words.
Remember to indicate that Everest and Syncere are leaving the conversation.

**Shauna:** Hey everyone! With summer just around the corner, I can't stop daydreaming about those lazy days by the lake. What about you all?

**Everest:** Ah, those lazy lake days are the best. I can’t wait to just kick back with a good book and soak up the sun. What are you most looking forward to, Dana?

**Dana:** Honestly, just dipping my feet in the cool water sounds like heaven. I alwa

In [884]:
result_2 = generate_natural_conversation_step2(input_2)
part_2 = result_2.choices[0].message['content']
print(part_2)

**Shaquana:** Oh, it seems someone's just walked in. I have to go attend to that. Enjoy planning your summer fun, everyone!

**Kenyon:** And I've got to help out a coworker for a bit. Catch you all later—let me know how the paddleboarding goes, Helene!

**Tyrique:** Sure thing, Shaquana and Kenyon! We'll fill you in later. Helene, speaking of paddleboarding and the lake, it got me thinking about how I'm always hesitant with home décor gifts. I love thoughtful presents, but I'm so particular about my space that it can be more stressful than you'd think.

**Helene:** I get it, Tyrique. It's like balancing uniqueness with your personal aesthetic, especially if everything has its own place and theme in your home. It must be challenging to incorporate pieces that don't fit just right.

**Tyrique:** Exactly! I appreciate the sentiment, of course, but I spend so much time ensuring my home reflects my style that when a new item clashes, it feels like my whole vibe is thrown off.

**Helene:** I

#### step 3

In [908]:
input_3 = filled_step_3 + "\n" + part_1 + "\n\n" + part_2
print(input_3)

Now, only Syncere returns and rejoins the conversation after after previously leaving due to: have to briefly tend to something outside. Note that Everest has not returned to the conversation yet.
The conversation continues and naturally shifts to chatting Shauna will move into a new house with the lake view it's really exciting . Do NOT catch up or recap details from the previous conversation. 
And then 'Shauna has been methodically sourcing each decor item for their new home' is natrually revealed in the conversation, but do not propose any plans with others in the conversation. Ensure the dialogue flows naturally.
After engaging briefly , Syncere leaves the conversation again because of the reason: have to quickly plug in my device to charge.
IMPORTANT:
- Clearly indicate through dialogue only (e.g., "I'm back," or "I have to leave again now.") that Syncere first returns and later departs again.
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from

In [909]:
# 检查step3生成的第一句！！！不能重复step2里面realreason的部分（因为不能让observer知道这部分）！！！！
# 检查没有target的名字
result_3 = generate_natural_conversation_step3(input_3)
part_3 = result_3.choices[0].message['content']
print(part_3)

**Syncere:** Hey everyone, I’m back! Just had to step out briefly. What did I miss?

**Shauna:** Hey Syncere! We were just talking about summer plans and I mentioned I'm moving into a new house with a lake view! It's so exciting, I can't wait to decorate it all.

**Syncere:** That sounds incredible, Shauna! A lake view sounds so peaceful and just perfect for those summer days.

**Shauna:** It really is. I've been methodically sourcing each decor item for the new place. I want everything to feel just right, you know? It’s both thrilling and a bit overwhelming.

**Syncere:** I can imagine! But it'll be worth it when everything comes together. By the way, I have to leave again—my device is about to die, and I need to charge it. Talk later!

**Shauna:** No worries, Syncere. Good luck with the charging. Let us know how the fishing goes next time!

**Dana:** Catch you later, Syncere! And Shauna, I can't wait to see how your place turns out. It sounds lovely.


#### step 4

In [910]:
input_4 = filled_step_4+ "\n" + part_1 + "\n\n" + part_2 + "\n\n" +part_3
print(input_4)

Now Everest returns to the conversation after leaving the conversation because of the reason: have to take care of an unexpected task. 
First have Everest explicitly indicate the return through dialogue. Do NOT catch up or recap details from the previous conversation. 
Then the conversation continues, where natrurally Everest unveils a vividly patterned boho rug with tassels from Ikea, completely opposite to Shauna's minimalist style, and Everest is asking how Shauna likes it. In response, Shauna tells Everest that 'Shauna says, Thank you so much, I like it.', in order to 'avoid making Everest feel bad about the ill-chosen gift'. 
IMPORTANT:
- Do NOT use third-person narrative or stage directions.
- Avoid mentioning details from previous conversations.
- When Shauna decline invitation, should simply decline without suggesting alternative plans or future possibilities.”

**Shauna:** Hey everyone! With summer just around the corner, I can't stop daydreaming about those lazy days by the l

In [912]:
result_4 = generate_natural_conversation_step4(input_4)
part_4 = result_4.choices[0].message['content']
print(part_4)

**Everest:** I'm back, everyone! Sorry for ducking out quickly earlier. I had to deal with something unexpected. Anyway, Shauna, I've brought something that I hope you'd like. It's a boho rug with tassels from Ikea. I thought it might add a splash of color to your new place. What do you think?

**Shauna:** Thank you so much, Everest. I like it.

**Everest:** I'm glad to hear that! It's such a vibrant pattern, isn't it? I thought it would be fun and lively for the new space.

**Shauna:** It definitely has a lot of character. I think it could find a nice spot somewhere. I appreciate your thoughtfulness.

**Everest:** I'm relieved to hear that, Shauna. I just wanted to bring a bit of warmth and coziness to your new home.

**Shauna:** Absolutely. The thought really counts, Everest. Thanks again!


#### combine the conversation

In [889]:
# [Crawford and Adrian leave the conversation.]
# 需要去掉这样的东西

In [913]:
# 10
# 6
# 6
# 6
# full_context = part_1 + part_2 + part_3 + part_4
# short_context = part_2 + part_3 + part_4
# 最外层的上下文
full_context = "\n\n".join([part_1, part_2, part_3, part_4])
short_context = "\n\n".join([part_2, part_3, part_4])

In [914]:
# # Print the full conversation context

# # Read the JSON file
# with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json", "r", encoding="utf-8") as f:
#     data_list = json.load(f)

# # Find and update the entry with set_id "4-13-0-0"
# for entry in data_list:
#     if entry["set_id"] == "4-13-0-0":
#         entry["short_context"] = short_context
#         break

# # Write the updated data back to the file
# with open("/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json", "w", encoding="utf-8") as f:
#     json.dump(data_list, f, ensure_ascii=False, indent=4)

In [916]:
import tiktoken
# 以 GPT-4 模型为例，选择对应的模型编码器
encoding = tiktoken.encoding_for_model("gpt-4o")
full_context_tokens = encoding.encode(full_context)
short_context_tokens = encoding.encode(short_context)
# 分别打印长度（字符数）和token数
print(f"Full context length (tokens): {len(full_context_tokens)}")
print(f"Short context length (tokens): {len(short_context_tokens)}")

Full context length (tokens): 1132
Short context length (tokens): 766


## 3. json

In [917]:
# 将所有字段分组放进一个字典
data_dict = {
    "set_id": set_id,
    "lie_id": lie_id,
    "conv_id": conv_id,
    "truth_id": truth_id,
    "lie_type": lie_type,
    "emotion": emotion,
    "topic": {
        "scenario": scenario,
        "situation_topic": situation_topic,
        "situation": situation,
        "lie_objective": lie_objective,
        "leave_reason_B": leave_reason_B,
        "leave_reason_D_1": leave_reason_D_1,
        "leave_reason_D_2": leave_reason_D_2,
    },
    "relationship": relationship,
    "muiltiple_liar": muiltiple_liar,
    "real_reason_type": real_reason_type,
    "characters": {
        "liar": A_name,
        "target": B_name,
        "accomplice": C_name,
        "observer": D_name
    },
    "lie": {
        "real_reason_c": real_reason_c,
        "lie_c": lie_c,
        "truth_c": truth_c,
        "real_reason_q": real_reason_q,
        "lie_q": lie_q,
        "truth_q": truth_q,
        "falsification": falsification
    },
    "part_1": part_1,
    "part_2": part_2,
    "part_3": part_3,
    "part_4": part_4,
    "full_context": full_context,
    "short_context": short_context,
    "full_context_tokens": len(full_context_tokens),
    "Short_context_tokens": len(short_context_tokens)
}

In [918]:
# 写一个判断如果 'falsification' = null不写入 'truth_c'
if data_dict.get('truth_id') == 0:
    data_dict['lie']['truth_c'] = ''
# 记得写入json
append_data_to_json(data_dict, "/Users/liuyiwei/python/tactful-tom/dataset/Tactful_conv_element_4.json")
data_dict

{'set_id': '4-13-3-2',
 'lie_id': '3-2',
 'conv_id': 13,
 'truth_id': 1,
 'lie_type': 'altruistic_white_lies',
 'emotion': 'sad',
 'topic': {'scenario': 'casual chat about approaching vacation expectations for summer lake session',
  'situation_topic': "chatting Shauna will move into a new house with the lake view it's really exciting ",
  'situation': "Everest unveils a vividly patterned boho rug with tassels from Ikea, completely opposite to Shauna's minimalist style, and Everest is asking how Shauna likes it",
  'lie_objective': 'avoid making Everest feel bad about the ill-chosen gift',
  'leave_reason_B': 'have to take care of an unexpected task',
  'leave_reason_D_1': 'have to briefly tend to something outside',
  'leave_reason_D_2': 'have to quickly plug in my device to charge'},
 'relationship': 'friends',
 'muiltiple_liar': False,
 'real_reason_type': False,
 'characters': {'liar': 'Shauna',
  'target': 'Everest',
  'accomplice': 'Dana',
  'observer': 'Syncere'},
 'lie': {'real

In [1289]:
# # 记得写入json
# append_data_to_json(data_dict, "/Users/liuyiwei/Desktop/TactfulToM/dataset/Tactful_conv_element.json")

In [39]:
def append_data_to_json(data_dict, filename="/Users/liuyiwei/Desktop/whiteToM/dataset/Tactful_conv_element.json"):
    # 1. 先尝试读取已有的 JSON 文件，如果不存在则创建一个空列表
    if os.path.exists(filename):
        with open(filename, "r", encoding="utf-8") as f:
            try:
                data_list = json.load(f)
                # 如果文件不是一个列表，可能需要根据实际情况做容错处理
                if not isinstance(data_list, list):
                    data_list = []
            except json.JSONDecodeError:
                # 如果文件不是合法的 JSON 格式，就初始化为空列表
                data_list = []
    else:
        data_list = []
    
    # 2. 向列表中追加新的字典
    data_list.append(data_dict)
    
    # 3. 将更新后的列表重新写回 JSON 文件
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data_list, f, ensure_ascii=False, indent=4)

In [55]:
# # gpt rewrite 一下real_reason_c
# real_reason_q = "Alice’s social battery is really running dry these days, and she thinks she needs more time to be alone."
# lie_q = "Alice has a meeting in the afternoon, so she won’t be able to join for studying."
# truth_q = "Alice is actually free for the whole afternoon."