In [None]:
import pandas as pd

df = pd.read_csv("../data/evaluation/all_dependencies.csv")

df_sample = df.sample(n=100)

df_sample.to_csv("../data/evaluation/test_dependencies.csv", index=False)

In [None]:
import json

data_file = "../data/evaluation/config5/all_dependencies_all.json"
output_file = "../data/evaluation/config5/all_dependencies_all_updated.json"

with open(data_file, "r", encoding="utf-8") as src:
    data = json.load(src)


for entry in data:
    context = entry["context"][:3]
    context_str = "\n\n".join([x["content"] for x in context])
    
    entry["context"] = context
    entry["context_str"] = context_str

with open(output_file, "a", encoding="utf-8") as dest:
    json.dump(data, dest, indent=2)


**Get index of dependencies for which no context could be retrieved**

In [None]:
import json

data_file = "../data/evaluation/config5/all_dependencies_all.json"

with open(data_file, "r", encoding="utf-8") as src:
    data = json.load(src)

    print(len(data))

    print(data[0]["context"][0].keys())

    for x in data:
        context = x["context"]
        if len(context) == 0:
            print(x["index"])

**Get index of dependencies for which the response format cannot be load as json**

In [None]:
import json

data_file = "../data/evaluation/without/all_dependencies_without_llama3:8b.json"

target_indices = []

with open(data_file, "r", encoding="utf-8") as src:
    data = json.load(src)

    print(len(data))

    print(data[0]["context"][0].keys())

    for entry in data:
        response = entry["response"]
        
        try:
            response_dict = json.loads(response)
        except json.JSONDecodeError:
            target_indices.append(entry["index"])
            print("JSONDecodeError Index: ", entry["index"])
            try:
                isDependency = response_dict["isDependency"]
            except KeyError as error:
                print("IsDependency missing: ", entry["index"])

        except KeyError:
            target_indices.append(entry["index"])
            print("KeyError Index: ", entry["index"])

        
target_indices

**Run generation pipeline for specific indices**

In [None]:
from prompt_settings import PrompSettingsFactory
from typing import Dict, List
from generator import GeneratorFactory, GeneratorEngine
from dotenv import load_dotenv
from util import load_config
from tqdm import tqdm
import backoff
import json


@backoff.on_exception(backoff.expo, Exception, max_tries=10)
def generate(generator: GeneratorEngine, messages: List) -> str:
    response = generator.generate(messages=messages)

    if not response:
        raise Exception("Response is empty.")
    
    try:
        json.loads(response)
    except json.JSONDecodeError:
        raise Exception("Response cannot be loaded as JSON.")

    return response


def run_generation(config: Dict, target_index: List[int]) -> None:
    
    print("Data file: ", config["data_file"])
    print("With RAG: ", config["with_rag"])
    

    with open(config["data_file"], "r", encoding="utf-8") as src:
        data = json.load(src)

    prompt_settings = PrompSettingsFactory.get_prompt_settings(tool_name=config["tool_name"])

    generator = GeneratorFactory().get_generator(
        model_name=config["model_name"], 
        temperature=config["temperature"]
    )

    for entry in tqdm(data, total=len(data), desc="Processing entries"):
        
        if not entry["index"] in target_index:
            continue

        print("Process index: ", entry["index"])

        if config["with_rag"]:
            query_str = prompt_settings.query_prompt.format(
                context_str=entry["context_str"], 
                task_str=entry["task_str"],
                format_str=prompt_settings.get_format_prompt()
            )
        else:
            query_str =f"{entry['task_str']}\n\n{prompt_settings.get_format_prompt()}"

        messages = [
            {
                "role": "system", 
                "content": entry["system_str"]
            },
            {
                "role": "user",
                "content": query_str
            }
        ]

        response = generate(
            generator=generator,
            messages=messages
        )

        #entry["responses"] = [response]
        entry["response"] = response
        
    output_file = config["data_file"]
    with open(output_file, "w", encoding="utf-8") as dest:
        json.dump(data, dest, indent=2)
            

config_file = "../generation_config.toml"
env_file = "../.env"

load_dotenv(dotenv_path=env_file)

# load config
config = load_config(config_file=config_file)
run_generation(config=config, target_index=[125, 133, 181, 192, 238, 264, 268, 282, 285, 486])

In [None]:
import json

model_names = ["gpt-4o-2024-05-13", "gpt-3.5-turbo-0125", "llama3:8b", "llama3:70b"]

updated_file = "../data/results/config4/all_dependencies_all_updated.json"
data_file = "../data/results/config4/all_dependencies_all_llama3:70b.json"


with open(updated_file, "r", encoding="utf-8") as src:
    updated_data = json.load(src)


with open(data_file, "r", encoding="utf-8") as src:
    data = json.load(src)



for entry in updated_data:
    index = entry["index"]
    print(index)
    data[index] = entry


with open(data_file, "w", encoding="utf-8") as dest:
    json.dump(data, dest, indent=2)

In [1]:
from prompt_settings import AdvancedCfgNetPromptSettings
from data import Dependency
from util import get_projet_description

dependency = Dependency(
    project="piggymetrics",
    option_name="server.port",
    option_value="8000",
    option_type="PORT",
    option_file="application.properties",
    option_technology="spring",
    dependent_option_name="EXPOSE",
    dependent_option_value="8000",
    dependent_option_type="PORT",
    dependent_option_file="Dockerfile",
    dependent_option_technology="Docker"
)

dependency1 = Dependency(
    project="piggymetrics",
    option_name="project.modelVersion",
    option_value="modelVersion:4.0.0",
    option_type="VERSION_NUMBER",
    option_file="pom.xml",
    option_technology="maven",
    dependent_option_name="project.modelVersion",
    dependent_option_value="modelVersion:4.0.0",
    dependent_option_type="VERSION_NUMBER",
    dependent_option_file="config/pom.xml",
    dependent_option_technology="maven"
)

project_str = get_projet_description(dependency.project)

system_str = AdvancedCfgNetPromptSettings.get_system_str(
    dependency=dependency,
    project_str=project_str
)


context_str="Context",
shot_str="Shots" 
task_str = AdvancedCfgNetPromptSettings.get_task_str(dependency=dependency)
format_str = AdvancedCfgNetPromptSettings.get_format_prompt()

user_prompt = AdvancedCfgNetPromptSettings.advanced_query_prompt.format(
    context_str=context_str,
    shot_str=shot_str,
    task_str=task_str,
    format_str=format_str
)

format_str



'Respond in a JSON format as shown below:\n{\n\t“plan”: string, // Write down a step-by-step plan on how to solve the task given the information above.\n\t“rationale”: string, // Provide a concise explanation of whether and why the configuration options depend on each other due to value-equality.\n\t“isDependency”: boolean // True if a dependency exists, or False otherwise.\n}'

In [2]:
from util import load_shots, get_most_similar_shot, get_most_similar_shots

shots = load_shots()

relevant_shot = get_most_similar_shot(shots=shots, dependency=dependency)

relevant_shot

In [8]:
relevant_shots = get_most_similar_shots(shots=shots, dependency=dependency1)

relevant_shots

('Carefully evaluate whether configuration option project.modelVersion of type VERSION_NUMBER with value modelVersion:4.0.0 in litemall-all/pom.xml of technology maven depends on configuration option project.modelVersion of type VERSION_NUMBER with value modelVersion:4.0.0 in litemall-admin-api/pom.xml of technology maven or vice versa.\n\n{\n    "rationale": "The \'project.modelVersion\' in a Maven project typically specifies the version of the POM model being used. Maven currently only supports the model version 4.0.0 and this is required all modules to ensure compatibility. Therefore, the project.modelVersion must be consistent across all modules, creatiing a dependency based on value-equality.",\n    "isDependency": true\n}',
 'Carefully evaluate whether configuration option project.modelVersion of type VERSION_NUMBER with value modelVersion:4.0.0 in mall-security/pom.xml of technology maven depends on configuration option project.modelVersion of type VERSION_NUMBER with value mode