-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.py
276 lines (226 loc) · 9.11 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import os
import re
import time
import yaml
import openai
import multiprocessing
openai.api_key = os.getenv("OPENAI_KEY")
print(openai.api_key)
prompt_template = """
You are outstanding data analysts. Now you need to analyze the reason of customer choices. Next is the thought process of customer:
{process}
This customer role description is
{role_desc}
Please choose single or multi options:
1: Meet Core Needs (Align with diet restriction or taste)
2: Brand Loyalty (Customer previous experience)
3: Public Praise (High Score or Positive comments)
4: More Affordable or Reasonable
5: Unique or Signature Dish
6: Explore New Dining Experience or Flavor
Only output all answer (1 - 6)
"""
prompt_template_group = """
You are outstanding data analysts. Now you need to analyze the reason of customer choices. Next is the thought process of customer:
{process}
Please choose single or multi options:
1: Meet Core Needs (Align with diet restriction or taste)
2: Brand Loyalty (Customer previous experience)
3: Public Praise (High Score or Positive comments)
4: More Affordable or Reasonable
5: Unique or Signature Dish
6: Explore New Dining Experience or Flavor
Only output all answer (1 - 6)
"""
def get_gpt_response(prompt):
messages = [{'role': 'user', 'content': prompt}]
completion = openai.ChatCompletion.create(
model="gpt-4-1106-preview",
messages=messages,
temperature=0.7,
max_tokens=2000,
)
response = completion.choices[0]['message']['content']
response = response.strip()
time.sleep(5)
return response
# 统计客户选择原因
def single_reason(path='./logs'):
# 读取 group.yaml文件
with open('competeai/examples/group.yaml', 'r') as f:
config = yaml.safe_load(f)
# read all players
players = config['players']
players = players[2:]
players_name = [ 'Jack', 'Xena', 'Bob']
# 构建name2index的dict
player2idx = {}
player2role = {}
for i, player in enumerate(players):
player2idx[player['name']] = i
player2role[player['name']] = player['role_desc']
group2idx = {}
# 读取config中scenes中的第二个元素的players
players = config['scenes'][1]['players']
# this list contain group inforamtion, find all single element
for i, player in enumerate(players):
if isinstance(player, str):
group2idx[player] = i
# 除去 Oscar,Umar剩余8个顾客
customers = players_name
# 读取logs所有的文件
exps_name = os.listdir(path)
# get all exp named with 'single
exps = [exp for exp in exps_name if 'single' in exp or 'group' in exp]
# find the customer dine history message in each log folder
# 使用文件记录全过程
log = open('log_single.txt', 'a')
# 用字典记录每个顾客选择的原因
customers_reason = {}
for customer in customers:
log.write(f'Customer: {customer}\n')
role_desc = player2role[customer]
customer_reason = {}
for exp in exps:
log.write(f'Experiment: {exp}\n')
exp_path = os.path.join(path, exp)
index = player2idx[customer] if 'single' in exp else group2idx[customer]
file_name = f'dine_{index}'
# print(file_name)
content = open(os.path.join(exp_path, file_name), 'r')
content = content.read()
# 根据结构找到每一次客户选择过程
# 从下面的结构提取内容 " "reason" (xxx) }"
# regular match
def regular_match(content):
pattern = r"\"reason\"(.*?)}"
# 需要匹配换行符
res = re.findall(pattern, content, re.S)
# res = re.findall(pattern, content)
format_tag = "Only compare"
res = [r.strip() for r in res if format_tag not in r]
return res
def regular_match_2(content):
pattern = r"\"summary\"(.*?)}"
# 需要匹配换行符
res = re.findall(pattern, content, re.S)
# res = re.findall(pattern, content)
format_tag = "including why this"
res = [r.strip() for r in res if format_tag not in r]
return res
processes = regular_match(content)
# 使用gpt询问每个过程中出现的原因
cnt = {}
for process in processes:
prompt = prompt_template.format(process=process, role_desc=role_desc)
ans = get_gpt_response(prompt)
print(ans)
# 提取答案(A-E)
pattern = r"[1-6]"
ans = re.findall(pattern, ans)
print(ans)
ans_str = ','.join(ans)
log.write(f'Reason: {ans_str}\n')
# 将选项插入计数字典中
for r in ans:
if r not in customer_reason:
customer_reason[r] = 1
else:
customer_reason[r] += 1
if r not in cnt:
cnt[r] = 1
else:
cnt[r] += 1
# 记录本次实验该顾客选择的原因总数
log.write(f'one exp reason dict: {cnt}\n')
log.write(f'{customer}: {customer_reason}\n')
customers_reason[customer] = customer_reason
print(customers_reason)
print(customers_reason)
def group_reason(path='./logs'):
# 读取 group.yaml文件
with open('competeai/examples/group.yaml', 'r') as f:
config = yaml.safe_load(f)
# read all players
players = config['players']
players = players[2:]
# 2 - 14 13个顾客组
groups = list(range(9, 15))
# 读取logs所有的文件
exps_name = os.listdir(path)
# get all exp named with 'single
exps = [exp for exp in exps_name if 'group' in exp]
# find the customer dine history message in each log folder
# 使用文件记录全过程
log = open('log_group.txt', 'a')
# 用字典记录每个顾客选择的原因
groups_reason = {}
for group in groups:
log.write(f'group id: {group}\n')
group_reason = {}
for exp in exps:
log.write(f'Experiment: {exp}\n')
exp_path = os.path.join(path, exp)
index = 10 + int(group)
file_name = f'group_dine_{index}'
# print(file_name)
content = open(os.path.join(exp_path, file_name), 'r')
content = content.read()
# 根据结构找到每一次客户选择过程
# 从下面的结构提取内容 " "summary" (xxx) }"
# regular match
def regular_match(content):
pattern = r"\"summary\"(.*?)}"
# 需要匹配换行符
res = re.findall(pattern, content, re.S)
# res = re.findall(pattern, content)
format_tag = "including why this"
res = [r.strip() for r in res if format_tag not in r]
return res
processes = regular_match(content)
# 使用gpt询问每个过程中出现的原因
cnt = {}
for process in processes:
prompt = prompt_template_group.format(process=process)
ans = get_gpt_response(prompt)
print(ans)
# 提取答案(1-6)
pattern = r"[1-6]"
ans = re.findall(pattern, ans)
print(ans)
ans_str = ','.join(ans)
log.write(f'Reason: {ans_str}\n')
# 将选项插入计数字典中
for r in ans:
if r not in group_reason:
group_reason[r] = 1
else:
group_reason[r] += 1
if r not in cnt:
cnt[r] = 1
else:
cnt[r] += 1
# 记录本次实验该顾客选择的原因总数
log.write(f'one exp reason dict: {cnt}\n')
log.write(f'group_{group}: {group_reason}\n')
groups_reason[group] = group_reason
print(groups_reason)
print(groups_reason)
if __name__ == "__main__":
# get current path
# print(os.getcwd())
# 并行运行group 和 single
# single_reason('./logs')
group_reason('./logs')
# 创建两个进程,分别运行func1和func2,并传递参数
# process1 = multiprocessing.Process(target=single_reason)
# process2 = multiprocessing.Process(target=group_reason)
# # 启动进程
# process1.start()
# process2.start()
# # 等待两个进程完成
# process1.join()
# process2.join()
# print("Both functions have finished.")