## 套件與資料載入

In [1]:
# Import packages
import sys
sys.path.append('..')
# sys.path.append('.')

from tqdm import tqdm
import re

import ast
from units import find_item, find_index

import json
import os
from langchain.callbacks import get_openai_callback

# if __name__ == '__main__':
#   import argparse
    
#     parser = argparse.ArgumentParser(description='')
#     parser.add_argument('--config_path', dest='config_path')
#     parameter_args = parser.parse_args()

#     config_path = parameter_args.config_path

config_path = '../config/evaluation_config.json'

with open(config_path, 'r') as f:
    config = json.load(f)
os.environ['OPENAI_API_KEY'] = config['OpenAI_api_key']

test_dataset_path = config['test_dataset_path']
with open(test_dataset_path, 'r') as f:
    test_set = json.load(f)

recommend_path = config['GPTs']['Retrieve']['database_path']
with open(recommend_path, 'r') as f:
    recommend_data = json.load(f)
    
only_for_classification = test_set['only_for_classification']

In [23]:
from chains import make_chain, make_memory, Chain_manager

SQL_PROMPT = config['GPTs']['SQL']['prompt']
RECOMMEND_PROMPT = config['GPTs']['Retrieve']['prompt']

chain_manager = Chain_manager(config)
classifier_chain, _ = chain_manager.make_chain('Classifier')
general_chain, _ = chain_manager.make_chain('Netizen') # Netizen General
db_chain, _ = chain_manager.make_chain('SQL')
retrieve_chain, _ = chain_manager.make_chain('Retrieve')
consultant_chain, memory = chain_manager.make_chain('Consultant')

comment_chain, _ = chain_manager.make_chain('Comment')
score_chain, _ = chain_manager.make_chain('General_user_score')
recommend_eva_chain, _ = chain_manager.make_chain('Recommend_evaluation')
consultant_comment_chain, _ = chain_manager.make_chain('Consultant_Comment')

## 預測

In [24]:
# Split to batch
batch_size = 10

questions_group = []
accumulate = 0
group_index = 1

group = {'data_list':[]}
collect_class = test_set['data_list'][0]['class']

for data in test_set['data_list']:
    
    data_class = data['class']
    if data_class != collect_class or accumulate == batch_size:
        
        group['class'] = collect_class
        group['group_index'] = group_index
        questions_group.append(group)
        collect_class = data_class
        
        group = {'data_list':[]}
        accumulate = 0
        group_index += 1
    
    question = data['question']
    group['data_list'].append({"question": f"{question}"})
    
    accumulate += 1   
    
group['class'] = collect_class
group['group_index'] = group_index
questions_group.append(group)

In [25]:
import time
time1 = time.time()

with get_openai_callback() as cb:
    
    data_index = 0
    for group in tqdm(questions_group, desc='模型預測'):
        
        predict_list = general_chain.batch(group['data_list'])    
        predict_class_list = classifier_chain.batch(group['data_list']) 
        accumulate = 0
        
        for predict_class, predict in zip(predict_class_list, predict_list):
            
            group['data_list'][accumulate]['answer'] = predict
            
            predict_class = re.findall('\d', predict_class)[0]
            test_set['data_list'][data_index]['predict_class'] = predict_class
            
            data_index += 1
            accumulate += 1
            
time2 = time.time()
process_time = time2 - time1
print('index search time:', process_time)

模型預測:   0%|          | 0/10 [00:00<?, ?it/s]

模型預測: 100%|██████████| 10/10 [01:43<00:00, 10.39s/it]

index search time: 103.93152523040771





In [26]:
for group in tqdm(questions_group, desc='模型預測'):
    
    accumulate = 0
    score_list = score_chain.batch(group['data_list'])
    for score in score_list:
        score = re.findall(r'\d+', score)[0]
        
        group['data_list'][accumulate]['score'] = float(score)
        accumulate += 1

模型預測:   0%|          | 0/10 [00:00<?, ?it/s]

模型預測: 100%|██████████| 10/10 [00:11<00:00,  1.16s/it]


In [27]:
total_classifier_score = 0
total_general_score = 0
data_num = 0
data_index = 0

for group in questions_group:
    
    for data in group['data_list']:
        
        total_general_score += data['score']
        data_num += 1
        
        test_set['data_list'][data_index]['answer'] = data['answer']
        test_set['data_list'][data_index]['score'] = data['score']
        
        predict_class = test_set['data_list'][0]['predict_class']
        data_class = test_set['data_list'][0]['class']
        
        if predict_class == data_class:
            total_classifier_score += 10
        
        data_index += 1
classifier_average = float(total_classifier_score/ data_num)
general_average = float(total_general_score/ data_num)
print(f'Classifier score: {classifier_average}')
print(f'General score: {general_average}')

total_response_count = 0
for data in test_set['data_list']:
    total_response_count += len(data['answer'])
print(f'Response count: {total_response_count}')

test_set['Response count'] = total_response_count
test_set['token'] = cb.total_tokens
test_set['cost'] = cb.total_cost
test_set['Inference time'] = process_time
print(f'Cost: {round(cb.total_cost, 15)}')
print(f'Token: {cb.total_tokens}')
print(f'Inference time', process_time)

Classifier score: 10.0
General score: 7.96
Total response count: 14436
Cost: 0.418304500000001
Token: 80307
Inference time 103.93152523040771


In [7]:
# # General
# Classifier score: 10.0
# General score: 8.01
# Response count: 25097
# Cost: 0.167917
# Token: 101056
# Inference time 120.15399718284607

# # Netizen
# Classifier score: 10.0
# General score: 7.96
# Response count: 14436
# Cost: 0.418304500000001
# Token: 80307
# Inference time 103.93152523040771


## 紀錄儲存

In [8]:
from units import get_time_text

_, evaluation_datetime = get_time_text()
test_set['Evaluation datetime'] = evaluation_datetime
report_path = config['report_path']
evaluation_json_path = f'{report_path}/{evaluation_datetime}.json'

with open(evaluation_json_path, 'w') as rm:
    json.dump(test_set, rm, ensure_ascii=False, indent=4)
    
print(f'Report path : {evaluation_json_path}')

Report path : ../report/2024-03-20 13-32-21.json
