In [2]:
'''
use retrieval augmented multi-role multi-expert collaboration (RAM2C) framework to generate responses
2024.9.15
'''
import re, json
import numpy as np
from t3_generate_db import get_vec_db, rewritings_rrf

def get_answers() -> list[str]:
    with open('/home/hhy/RAM2C/generated_data/answers_20240915.txt', 'r', encoding='utf-8') as f:
        answers = re.split('\d+ ', ''.join(f.readlines()))[1:]
    return answers

def get_questions() -> list[str]:
    with open('/home/hhy/RAM2C/generated_data/questions_20240915.txt', 'r', encoding='utf-8') as f:
        questions = re.split('\d+ \d ', ''.join(f.readlines()))[1:]
    return questions

def get_questions_answers() -> list[tuple[str, str]]:
    '''
    136 questions and answers
    '''
    with open('/home/hhy/RAM2C/evaluations/Q_A_eval.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    QA = [(question, data[question][0]) for question in data]
    return QA
QA = get_questions_answers()

1. retrive documents
2. group reflection to ducuments
3. proactive analysis to filtered documents
4. generate response based on analysis and question-answer pair
5. integrate all experts' responses to generate final response

# 1. retrive documents

In [3]:
print("load vector db...")
# record_db = get_vec_db(path='/home/hhy/RAM2C/textdata/record_db')
# edu_theory_db = get_vec_db(path='/home/hhy/RAM2C/textdata/edu_theory_db')
novel_db = get_vec_db(path='/home/hhy/RAM2C/textdata/novel_db')
print("vector db loaded.")

load vector db...
Load existing db from /home/hhy/RAM2C/textdata/novel_db
vector db loaded.


# 2. group reflection to ducuments

In [28]:
import t4_group_reflection
from t4_group_reflection import fields_7, group_reflection
import random
import importlib
importlib.reload(t4_group_reflection)

In [32]:
import prompts
importlib.reload(prompts)
from chat_models import ChatvLLM
from prompts import teacher_analyze_ref_prompt, teacher_response_prompt

In [None]:
model_name = "/mnt/d/LLM/MiniCPM-2B-dpo-fp16/"
model = ChatvLLM(model_path=model_name)

In [27]:
for i, (ques, ans) in enumerate(QA):
    refs = []
    novel_reranked = rewritings_rrf(db=novel_db, query=ques+ans)
    # theory_reranked = rewritings_rrf(db=edu_theory_db, query=ques)
    # record_reranked = rewritings_rrf(db=record_db, query=ques)
    refs.extend(novel_reranked.keys())
    # refs.extend(theory_reranked.keys())
    # refs.extend(record_reranked.keys())

    cumulative_decisions = np.zeros((len(refs)))
    for field in fields_7:
        decisions = t4_group_reflection.group_reflection(ques, ans, refs, field)
        for j, label in enumerate(decisions):
            if 'Yes' in label or 'YES' in label or 'yes' in label:
                cumulative_decisions[j] += 1
    cumulative_decisions /= len(fields_7)
    print(cumulative_decisions)
    references = [refs[j] for j, label in enumerate(cumulative_decisions) if label >= 0.5]
    if len(references) >= 3:
        for j, ref in enumerate(random.sample(references, 3)):
            ##################################################################################
            ######################     3. proactive       analysis      ######################
            ##################################################################################
            analysis = model.completion(prompt=teacher_analyze_ref_prompt.format(
                question=ques,
                answer=ans,
                reference=ref),
                system_prompt='You are a talented Chinese language teacher with a deep understanding of literature teaching.')
            

            ##################################################################################
            ######################     4. generate        response      ######################
            ##################################################################################
            response = model.completion(
                prompt=teacher_response_prompt.format(
                    question=ques,
                    answer=ans[0],
                    reference=ref+'\n'+analysis.choices[0].message.content),
                system_prompt='You are a talented Chinese language teacher with a deep understanding of literature teaching.')
            
            # print(i, j, response.choices[0].message.content)
    else:
        print(i, 'not enough references')

['1. "鲁滨逊在荒岛上的生存技巧：如何利用树木、动物和自然资源"', '2. "小朋友们学习鲁滨逊荒岛生存：树木、动物和其他自然资源的利用方法"', '小朋友们，我们知道鲁滨逊在荒岛上生活了很长时间，他不仅要解决吃和住的问题，还要学会怎么自救。这个故事告诉我们，一个人要有多么强大的生存能力啊！那么，你们觉得鲁滨逊是怎么利用岛上的资源，比如树木、动物和其他自然资源来保持生活的呢？\n']


100%|██████████| 4/4 [00:01<00:00,  2.94it/s]


['no', 'no', 'no', 'no']


100%|██████████| 4/4 [00:01<00:00,  2.79it/s]


['no', 'no', 'No', 'no']


100%|██████████| 4/4 [00:01<00:00,  3.01it/s]


['no', 'No', 'no', 'No']


100%|██████████| 4/4 [00:01<00:00,  2.98it/s]


['no', 'yes', 'no', 'no']


100%|██████████| 4/4 [00:02<00:00,  1.75it/s]


['no', 'no', 'no', 'no']


100%|██████████| 4/4 [00:01<00:00,  3.06it/s]


['no', 'no', 'no', 'no']


100%|██████████| 4/4 [00:01<00:00,  2.87it/s]


['no', 'yes', 'no', 'no']
[0.         0.28571429 0.         0.        ]
['1. "如果我是鲁滨逊，如何用创意改变命运"', '2. "鲁滨逊漂流记 创意生存方法 让岛上生活更精彩"', '同学们，如果你们是鲁滨逊，你们会怎么用创造性的想法来改变自己的命运或者说，你们会想出什么有趣的方法来让自己在岛上的生活变得更加精彩和快乐呢？\n']


 60%|██████    | 3/5 [00:01<00:00,  2.80it/s]


KeyboardInterrupt: 