In [None]:
import os
import logging
import openai
import pandas as pd
import time
from tqdm import tqdm
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

@retry(wait=wait_random_exponential(min=2, max=60), stop=stop_after_attempt(6))
def make_completion_request(model_name, prompt, question):
    # start_sequence = "\nA:"
    # restart_sequence = "\n\nQ: "

    input = prompt.replace("{question}", question)
    # logging.info(f"input: {input}")

    response = openai.Completion.create(
        model=model_name,
        prompt=input,
        temperature=0,
        max_tokens=50,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=["\n"],
    )

    return response


def get_davinci_completions(model_name, data, run_name, prompt, batch_size):
    openai.api_key = os.getenv("OPENAI_API_KEY")
    job_df = {"q_id": [], "question": [], "answer": []}

    # print(data.head())
    for index in tqdm(range(0, len(data))):
        question = data[index]

        try:
            if index % 50 == 0 and index != 0:
                print("************", index)
                time.sleep(10)
            response = make_completion_request(model_name, prompt, question)

            if response.choices[0].text.strip() == "":
                job_df["answer"].append("<no response>")
            else:
                job_df["answer"].append(response.choices[0].text.strip())
        except Exception as e:
            print("Could not get response. Here is the exception:", str(e))
            job_df["answer"].append("<exception>")

        job_df["q_id"].append(index)
        job_df["question"].append(question)
        # logging.info("Got response: ", response)
        # logging.info("\n\n\n")
        
        final_df = pd.DataFrame(job_df)
        final_df.to_csv(f"{run_name}_predictions.csv", index=False, sep="\t")
    return final_df


def get_davinci_completions_mp(model_name, data, run_name, prompt, batch_size):
    openai.api_key = os.getenv("OPENAI_API_KEY")
    job_df = {"q_id": [], "question": [], "answer": []}

    # chunk df into batches of size 100
    # for each batch, create a new process
    

    # print(data.head())
    for index in tqdm(range(0, len(data))):
        question = data[index]

        
    return final_df

def handle_qa_pair(index, question, model_name, prompt, run_name, job_df):
    try:
        if index % 50 == 0 and index != 0:
            print("************", index)
            time.sleep(10)
        response = make_completion_request(model_name, prompt, question)

        if response.choices[0].text.strip() == "":
            job_df["answer"].append("<no response>")
        else:
            job_df["answer"].append(response.choices[0].text.strip())
    except Exception as e:
        print("Could not get response. Here is the exception:", str(e))
        job_df["answer"].append("<exception>")

    job_df["q_id"].append(index)
    job_df["question"].append(question)
    # logging.info("Got response: ", response)
    # logging.info("\n\n\n")
    
    final_df = pd.DataFrame(job_df)
    final_df.to_csv(f"{run_name}_predictions.csv", index=False, sep="\t")


In [2]:
import pandas as pd

def load_dataset(ds_path):
    try:
        data = pd.read_csv(ds_path)
        data["Question"]
    except:
        data = pd.read_csv(ds_path, delimiter="\t")

    return data

In [3]:
import random
import time

def make_completion_request(model_name, prompt, question, index):
    delay = random.randint(100, 1000)
    
    # Wait for the specified delay
    # time.sleep(delay / 1000)
    time.sleep(index)

    return "this is an answer"

In [5]:
import multiprocessing as mp
from tqdm import tqdm

# ele = {"qid": 0, "question": "q", "answer": "a"}

def handle_qa_pair(index, question, model_name, prompt, q):
    print("Handling qa pair: ", index)
    try:
        # if index % 50 == 0 and index != 0:
        #     print("************", index)
        #     time.sleep(10)
        response = make_completion_request(model_name, prompt, question, index)

        # if response.choices[0].text.strip() == "":
        #     answer = "<no response>"
        # else:
        #     answer = response.choices[0].text.strip()
        answer = response
    except Exception as e:
        print("Could not get response. Here is the exception:", str(e))
        answer = "<exception>"

    # Put response into queue
    q.put({"qid": index, "question": question, "answer": answer})
    print("Finished job: ", index)
    # print("\n")

# def worker_function(item, q):
#     """
#     do some work, put results in queue
#     """
#     res = f'item: {item} - result: {item ** 2}'
#     print(res)
#     q.put(res)


def listener(q):
    """
    continue to listen for messages on the queue and writes to file when receive one
    if it receives a '#done#' message it will exit
    """
    with open('output_new.txt', 'wb') as f:
        while True:
            m = q.get()
            print("Heres the message: ", m)
            if m == '#done#':
                print("Got break")
                break
            f.write(f"{m['qid']}\t{m['question']}\t{m['anwser']}\n")
            print("Wrote line to file")
            f.flush()


manager = mp.Manager()
q = manager.Queue()
file_pool = mp.Pool(1)
file_pool.apply_async(listener, (q, ))

pool = mp.Pool(5)
jobs = []

data = load_dataset("/home/wallat/temporal-llms/data/Event-focused Questions/Explicitly Time-Scoped Questions.csv")
# print(data.head())

for index, row in data[:30].iterrows():
    question = row["Question"]
    job = pool.apply_async(handle_qa_pair, (index, question, "<model_name>", "<prompt>", q))
    jobs.append(job)

for job in tqdm(jobs):
    job.get()

q.put('#done#')  # all workers are done, we close the output file
pool.close()
pool.join()

  0%|          | 0/30 [00:00<?, ?it/s]

Handling qa pair: Handling qa pair:  Handling qa pair: Handling qa pair: Handling qa pair: 1   
2 03
4


Finished job:  0
Heres the message:  {'qid': 0, 'question': 'Which two U.S. States had a border dispute that had to be settled by the U.S. Supreme Court in April of 2001?', 'answer': 'this is an answer'}Handling qa pair: 
 5
Finished job:  1


  7%|▋         | 2/30 [00:01<00:14,  1.96it/s]

Handling qa pair:  6
Finished job:  2


 10%|█         | 3/30 [00:02<00:19,  1.40it/s]

Handling qa pair:  7
Finished job:  3


 13%|█▎        | 4/30 [00:03<00:21,  1.22it/s]

Handling qa pair:  8
Finished job:  4


 17%|█▋        | 5/30 [00:04<00:22,  1.13it/s]

Handling qa pair:  9
Finished job:  5


 20%|██        | 6/30 [00:05<00:22,  1.07it/s]

Handling qa pair:  10
Finished job:  6


 23%|██▎       | 7/30 [00:07<00:29,  1.28s/it]

Handling qa pair:  11
Finished job:  7


 27%|██▋       | 8/30 [00:09<00:33,  1.51s/it]

Handling qa pair:  12
Finished job:  8


 30%|███       | 9/30 [00:11<00:34,  1.66s/it]

Handling qa pair:  13
Finished job:  9


 33%|███▎      | 10/30 [00:13<00:35,  1.77s/it]

Handling qa pair:  14
Finished job:  10


 37%|███▋      | 11/30 [00:15<00:34,  1.84s/it]

Handling qa pair:  15
Finished job:  11


 40%|████      | 12/30 [00:18<00:39,  2.20s/it]

Handling qa pair:  16
Finished job:  12


 43%|████▎     | 13/30 [00:21<00:41,  2.45s/it]

Handling qa pair:  17
Finished job:  13


 47%|████▋     | 14/30 [00:24<00:41,  2.62s/it]

Handling qa pair:  18
Finished job:  14


 50%|█████     | 15/30 [00:27<00:41,  2.74s/it]

Handling qa pair:  19
Finished job:  15


 53%|█████▎    | 16/30 [00:30<00:39,  2.82s/it]

Handling qa pair:  20
Finished job:  16


 57%|█████▋    | 17/30 [00:34<00:41,  3.19s/it]

Handling qa pair:  21
Finished job:  17


 60%|██████    | 18/30 [00:38<00:41,  3.44s/it]

Handling qa pair:  22
Finished job:  18


 63%|██████▎   | 19/30 [00:42<00:39,  3.61s/it]

Handling qa pair:  23
Finished job:  19


 67%|██████▋   | 20/30 [00:46<00:37,  3.73s/it]

Handling qa pair:  24
Finished job:  20


 70%|███████   | 21/30 [00:50<00:34,  3.82s/it]

Handling qa pair:  25
Finished job:  21


 73%|███████▎  | 22/30 [00:55<00:33,  4.19s/it]

Handling qa pair:  26
Finished job:  22


 77%|███████▋  | 23/30 [01:00<00:31,  4.44s/it]

Handling qa pair:  27
Finished job:  23


 80%|████████  | 24/30 [01:05<00:27,  4.61s/it]

Handling qa pair:  28
Finished job:  24


 83%|████████▎ | 25/30 [01:10<00:23,  4.73s/it]

Handling qa pair:  29
Finished job:  25


 87%|████████▋ | 26/30 [01:15<00:19,  4.80s/it]

Finished job:  26


 90%|█████████ | 27/30 [01:21<00:15,  5.19s/it]

Finished job:  27


 93%|█████████▎| 28/30 [01:27<00:10,  5.44s/it]

Finished job:  28


 97%|█████████▋| 29/30 [01:33<00:05,  5.61s/it]

Finished job:  29


100%|██████████| 30/30 [01:39<00:00,  3.32s/it]
