In [1]:
from huggingface_hub import InferenceClient
from tqdm import tqdm
import os
import json

In [2]:
from concurrent.futures import ThreadPoolExecutor, as_completed

generate_kwargs = dict(
    temperature=1.0,
    max_new_tokens=4096,
    top_p=0.95,
    repetition_penalty=1.0,
    do_sample=True,
)

In [3]:
client = InferenceClient(
    "", timeout = 120
)


def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

In [5]:
from glob import glob

files = glob('mixtral-rag-question-factually-wrong/*.json')
files.extend(glob('mixtral-rag-question-factually-wrong-textbook/*.json'))
len(files)

146548

In [6]:
questions = []
for f in tqdm(files):
    with open(f) as fopen:
        data = json.load(fopen)
    output = data[-1]
    splitted = output.strip().split('\n')
    splitted = [s for s in splitted if '.' if s and '?' in s]
    splitted = ['.'.join(s.split('.')[1:]).strip() for s in splitted]
    splitted = [s for s in splitted if len(s) > 3]
    splitted = [s[1:] if s[0] == '"' else s for s in splitted]
    q = [s[:-1] if s[-1] == '"' else s for s in splitted]
    q = [(data[0], s) for s in q if len(s) > 10]
    questions.extend(q)
    
# questions = list(set(questions))
len(questions)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 146548/146548 [00:04<00:00, 32703.95it/s]


1281938

In [7]:
exists = set()
actual_questions = []
for q in tqdm(questions):
    if q[1] not in exists:
        exists.add(q[1])
        actual_questions.append(q)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 1281938/1281938 [00:00<00:00, 3199995.76it/s]


In [8]:
len(actual_questions)

1265220

In [10]:
!mkdir answer-factually-wrong
# !rm answer-factually-wrong/*.json

mkdir: cannot create directory ‘answer-factually-wrong’: File exists


In [11]:
def answer(q, i):
    filename = f'answer-factually-wrong/{i}.json'
    if os.path.exists(filename):
        return
    
    
    for _ in range(3):
        try:
            prompt = q[1]
            formatted_prompt = format_prompt(prompt, [])
            stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=False, details=True, return_full_text=False)
            output = stream.generated_text
            splitted = output.split()
            
            if len(splitted) < 20:
                continue
                
            with open(filename, 'w') as fopen:
                json.dump((q[0], output), fopen)
            break
        except Exception as e:
            # print(e)
            pass

In [12]:
actual_questions[0]

('Last weekend,\xa0Realme just had their first ever Realme 3 sale roadshow. It showcases all of the Realme products that are available in the market – like the Realme 3 which we love and also merchandises like the\xa0earbuds, tumbler, bagpacks and many more.\nSince the sale roadshow is held in a mall,\xa0visitors also won cash prizes up to RM1,000 by competing in a mini mobile game tournament at the roadshow.\n\n\n\nIt happened at two different places –\xa0Sunway Pyramid Shopping Mall and Sunway Velocity Mall. At these two venues, there were fans from all ages and from multiple different knowledge of Realme itself. Here are a few highlights from the Realme fans.\nDharani a/p Kaliappan\nDharani a/p Kaliappan, a 23-year-old Realme fan and now an owner of the Realme 3 smartphone has said that she liked the specs and design of the phone, and can’t believe that she’s able to own one for herself. The price is one of the main highlights on why she chooses this phone.\nLim Tiak Cheong\nNext we

In [13]:
prompts = []
for t in actual_questions:
    prompt = f'{t[0]}\n\n{t[1]}'
    prompts.extend([(t, prompt)] * 1)
    
len(prompts)

1265220

In [14]:
prompts[0]

(('Last weekend,\xa0Realme just had their first ever Realme 3 sale roadshow. It showcases all of the Realme products that are available in the market – like the Realme 3 which we love and also merchandises like the\xa0earbuds, tumbler, bagpacks and many more.\nSince the sale roadshow is held in a mall,\xa0visitors also won cash prizes up to RM1,000 by competing in a mini mobile game tournament at the roadshow.\n\n\n\nIt happened at two different places –\xa0Sunway Pyramid Shopping Mall and Sunway Velocity Mall. At these two venues, there were fans from all ages and from multiple different knowledge of Realme itself. Here are a few highlights from the Realme fans.\nDharani a/p Kaliappan\nDharani a/p Kaliappan, a 23-year-old Realme fan and now an owner of the Realme 3 smartphone has said that she liked the specs and design of the phone, and can’t believe that she’s able to own one for herself. The price is one of the main highlights on why she chooses this phone.\nLim Tiak Cheong\nNext w

In [15]:
answer(prompts[1], 1)

In [16]:
def consumer(queue, name):
    while True:
        if queue.qsize() == 0:
            break
        item = queue.get()
        answer(*item)
    print(f'consumer {name} done')

In [17]:
urls = [(q, no) for no, q in enumerate(prompts)]

In [18]:
from threading import Thread
from queue import Queue

queue = Queue()
for u in urls:
    queue.put(u)
    
ori_size = queue.qsize()

In [19]:
max_worker = 512
consumers = [Thread(target=consumer, args=(queue,i)) for i in range(max_worker)]
for i in range(len(consumers)):
    consumers[i].start()
    
pbar = tqdm(total=ori_size)
last_size = 0
while True:
    size = queue.qsize()
    if size == 0:
        break
    left = ori_size - size
    minus = left - last_size
    if minus > 0:
        pbar.update(minus)
        last_size += minus

pbar.close()

  1%|█▏                                                                                                       | 14465/1265220 [21:02<23:40:39, 14.67it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 57%|█████████████████████████████████████████████████████████▎                                           | 717236/1265220 [18:59:13<36:28:15,  4.17it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 64%|███████████████████████████████████████