In [4]:
from generator import GeneratorFactory
from typing import List

def generate(model_name: str, temperature: float, messages: List) -> str:
    generator = GeneratorFactory().get_generator(
        model_name=model_name, 
        temperature=temperature
    )
    response = generator.generate(messages=messages)
    return response

In [6]:
from prompt_settings import PrompSettingsFactory
from util import load_config
from dotenv import load_dotenv
import os
import mlflow
import json

config_file = "../config.toml"
env_file = "../.env"
config = load_config(config_file=config_file)
load_dotenv(dotenv_path=env_file)

print("Pinecone Key: ", os.getenv("PINECONE_API_KEY"))
print("GitHub Key: ", os.getenv("GITHUB_TOKEN"))

index_name = "tech-docs"
data_file = f"../data/evaluation/all_dependencies_{index_name}.json"
output_file = f"../data/evaluation/all_dependencies_{index_name}_results.json"
model_names = ["gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"] # "llama3:70b", "llama3:8b"

mlflow.set_experiment(experiment_name=f"rag_experiment")

with mlflow.start_run(run_name=f"{index_name}_{config['rerank']}_{config['embed_model']}-embedding"): 

    mlflow.log_params(config)
    mlflow.log_artifact(local_path=env_file)
    mlflow.log_artifact(local_path=data_file)

    with open(data_file, "r", encoding="utf-8") as src:
        data = json.load(src)

    prompt_settings = PrompSettingsFactory.get_prompt_settings(tool_name=config["tool_name"])

    results = []
    for entry in data:
        query_str = prompt_settings.query_prompt.format(
            context_str=entry["context_str"], 
            task_str=entry["task_str"],
            format_str=prompt_settings.get_format_prompt()
        ) 

        messages = [
            {
                "role": "system", 
                "content": entry["system_str"]
            },
            {
                "role": "user",
                "content": query_str
            }
            ]

        responses = []
        for model_name in model_names:
            response = generate(
                model_name=model_name, 
                temperature=config["temperature"],
                messages=messages
            )
            responses.append({model_name: response})

        entry["responses"] = responses

    with open(output_file, "w", encoding="utf-8") as dest:
        json.dump(data, dest, indent=2)

    mlflow.log_artifact(local_path=output_file)    

Pinecone Key:  4bc3fa0d-a789-4187-aa8f-d6b17d0ea6a3
GitHub Key:  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
