In [40]:
import time
from openai import OpenAI
import os
import json
import pandas as pd
from os import getenv
from concurrent.futures import ThreadPoolExecutor

In [41]:
client = OpenAI(base_url="https://openrouter.ai/api/v1",
                api_key=getenv("OPENROUTER_API_KEY"))

timestamp = time.strftime("%Y%m%d-%H%M%S")

In [42]:
def get_questions(file_name, folder_path):
    try:
        # remove the extension from the file name
        file_name = file_name.split(".")[0]
        path = os.path.join(folder_path, file_name + "_questions.json")
        # load the json file
        content = None
        with open(path, "r") as file:
            content = json.load(file)

        return content
    except FileNotFoundError:
        print(f"File {path} not found.")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [43]:
def prompt_model(model, messages, question):
    for _ in range(2):  # Retry once if exception occurs
        try:
            messages.append({"role": "user", "content": question})
            response = client.chat.completions.create(
                model=model,
                messages=messages
            )
            messages.append({"role": "assistant", "content": response.choices[0].message.content})
            return response
        except Exception as e:
            print(f"An error occurred: {e}")
            if _ == 0:  # If first attempt, continue to retry
                print(response)
                print("Retrying...")
                continue
            else:  # If second attempt, raise the exception
                print("Failed to get response.")
                return None

In [44]:
def run(output_folder_path, questions_folder_path):
    folders = [f for f in os.listdir(output_folder_path) if os.path.isdir(os.path.join(output_folder_path, f))]
    print(folders)

    models = {"meta-llama/llama-3-8b-instruct": [0.5 / 1_000_000, 1.5 / 1_000_000],
              "openai/gpt-3.5-turbo-0125": [0.07 / 1_000_000, 0.07 / 1_000_000],
              "meta-llama/llama-3-70b-instruct": [0.59 / 1_000_000, 0.79 / 1_000_000],
              "mistralai/mixtral-8x22b-instruct": [0.65 / 1_000_000, 0.65 / 1_000_000],
              "openai/gpt-4o": [5 / 1_000_000, 15 / 1_000_000],
              "openai/gpt-4-turbo": [10 / 1_000_000, 30 / 1_000_000],
              }

    # Function to process each file
    def process_file(folder, file, llm, cost):
        results = []
        path = os.path.join(output_folder_path, folder, file)
        filedata = get_questions(folder, questions_folder_path)
        
        try:
            with open(path, "r", encoding="utf8", errors="ignore") as f:
                content = f.read()
        except Exception as e:
            print(f"An error occurred: {str(e)}")
            print("When reading the file: " + path)
            return results

        # print the current model, folder and file
        print(f"Model: {llm}, Folder: {folder}, File: {file}")
        conversation = [{"role": "system", "content": "Only based on the information provided give short and concise answers to the following questions"},
                        {"role": "user", "content": content}]

        questions = filedata["questions"]
        for data in questions:
            question = data["question"]
            response = prompt_model(llm, conversation, question)

            results.append({'Model': llm,
                            'Folder': folder,
                             'File': file.split(".")[0],
                             'Question': question,
                             'Answer': "ERROR: Failed to get response." if response is None else response.choices[0].message.content,
                             'prompt tokens': "ERROR: Failed to get response." if response is None else response.usage.prompt_tokens,
                             'completion tokens': "ERROR: Failed to get response." if response is None else response.usage.completion_tokens,
                             'total tokens': "ERROR: Failed to get response." if response is None else response.usage.total_tokens,
                             'cost': cost[0] * response.usage.prompt_tokens + response.usage.completion_tokens * cost[1] ,
                             'not_mention': data.get("not_mention"),
                             'run': timestamp,
                             'Correct Answer': data["answer"]})
        return results

    df = pd.DataFrame()
    # Use ThreadPoolExecutor to handle files concurrently within each folder
    for llm, cost in models.items():
        results = []
        try:
            with ThreadPoolExecutor() as executor:
                future_to_folder = {executor.submit(process_file, folder, file, llm, cost): (folder, file)
                                    for folder in folders
                                    for file in os.listdir(os.path.join(output_folder_path, folder))}
                for future in future_to_folder:
                    results.extend(future.result())
        except Exception as e:
            print(e)
            print(f"An error occurred while processing the files: {str(e)}")
            print("Press Enter to try again.")
            input()


        # Append results to DataFrame outside of threads
        df = df._append(pd.DataFrame(results), ignore_index=True)

    # Saving results
    if not os.path.exists('results'):
        os.makedirs('results')
    try:
        file_timestamp = time.strftime("%Y%m%d-%H%M%S")
        df.to_excel(f'results/responses_{file_timestamp}.xlsx', index=False)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        print("Please close the file and press Enter to try again.")
        input()


In [None]:
run("output_data", "input_data/questions")