In [22]:
from typing import List
from cfgnet.network.network import NetworkConfiguration, Network
import random

repos = [
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/mall",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/apollo",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/litemall",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/piggymetrics",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/spring-boot-admin",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/mall-swarm",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/Spring-Cloud-Platform",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/jetlinks-community",
    "/home/simisimon/GitHub/cval_evaluation/spring-projects/music-website"
]


In [23]:
def get_dependencies(repo_path: str) -> List:
    cfg = NetworkConfiguration(
        project_root_abs=repo_path,
        enable_internal_links=True,
        enable_static_blacklist=False,
        enable_all_conflicts=True
    )

    network = Network.init_network(cfg=cfg)

    links = list(network.links)

    return links


In [24]:
test_set = []

for repo_path in repos:
    repo_name = repo_path.split("/")[-1]
    links = get_dependencies(repo_path=repo_path)
    print(f"Num links for {repo_name}: {len(links)}")
    random_sampled_links = random.choices(links, k=5)
    test_set += random_sampled_links

Num links for mall: 1435




Num links for apollo: 3584
Num links for litemall: 906


  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/spring-boot-admin/spring-boot-admin-samples/spring-boot-admin-sample-servlet/src/main/resources/application.yml", line 2, column 1
but found another document
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/spring-boot-admin/spring-boot-admin-samples/spring-boot-admin-sample-servlet/src/main/resources/application.yml", line 60, column 1"
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/spring-boot-admin/spring-boot-admin-samples/spring-boot-admin-sample-zookeeper/src/main/resources/application.yml", line 1, column 1
but found another document
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/spring-boot-admin/spring-boot-admin-samples/spring-boot-admin-sample-zookeeper/src/main/resources/application.yml", line 31, column 1"


Num links for piggymetrics: 2913




Num links for spring-boot-admin: 7612
Num links for mall-swarm: 1836


found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig/pig-auth/src/main/resources/application.yml", line 6, column 11"
found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig/pig-boot/src/main/resources/application.yml", line 8, column 11"
found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig/pig-gateway/src/main/resources/application.yml", line 6, column 11"
found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig/pig-upms/pig-upms-biz/src/main/resources/application.yml", line 6, column 11"
found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cval_evaluation/spring-projects/pig/pig-visual/pig-codegen/src/main/resources/application.yml", line 6, column 11"
found character '@' that cannot start any token
  in "/home/simisimon/GitHub/cva

Num links for Spring-Cloud-Platform: 2248




Num links for pig: 6116
Num links for jetlinks-community: 9328
Num links for music-website: 277


In [25]:
link_data = []

for link in test_set:
    link_data.append({
        "link_str": str(link),
        "project": str(link).split("::::")[0],
        "option_name": link.node_a.get_options(),
        "option_value": link.node_a.name,
        "option_file": link.artifact_a.name,
        "option_type": link.node_a.config_type,
        "option_technology": link.artifact_a.concept_name,
        "dependent_option_name": link.node_b.get_options(),
        "dependent_option_value": link.node_b.name,
        "dependent_option_file": link.artifact_b.name,
        "dependent_option_type": link.node_b.config_type,
        "dependent_option_technology": link.artifact_b.concept_name,
    })


In [26]:
import pandas as pd

df = pd.DataFrame(link_data)
df.to_csv("dependencies_with_rules.csv", index=False)

In [1]:
from llama_index.core import PromptTemplate

TASK_PROMPT = PromptTemplate(
    "Carefully evaluate whether configuration option {nameA} of type {typeA} with value {valueA} in {fileA} of technology {technologyA} \
    depends on configuration option {nameB} of type {typeB} with value {valueB} in {fileB} of technology {technologyB} or vice versa." 
)

SYSTEM_PROMPT = PromptTemplate(
    "You are full-stack expert in validating intra-technology and cross-technology configuration dependencies.\n" 
    "You will be presented with configuration options found in the software project '{project}'.\n" 
    "Your task is to determine whether the given configuration options actually depend on each other based on value-equality.\n\n"
    "A value-equality dependency is present if two configurations options must have identical values in order to function correctly."
    "Inconsistencies in these configuration values can lead to configuration errors."
    "Importantly, configuration options may have identical values by accident, meaning that there is no actual dependency, but just happens that they have equal values."
)


FORMAT_STR = """Respond in a JSON format as shown below:
{{
  “rationale”: string, // Provide a concise explanation of whether and why the configuration options depend on each other due to value-equality.
  “uncertainty”: integer, // Rate your certainty of this dependency on a scale from 0 (completely uncertain) to 10 (absolutely certain).
  “isDependency”: boolean // True if a dependency exists, or False otherwise.
}}"""



USER_PROMPT = PromptTemplate(
    "{task_str}\n\n"
    "{format_str}"
)

In [11]:
import pandas as pd
import ollama
import json


df = pd.read_csv("dependencies_with_rules.csv")

pre_rating = []

for x in df.to_dict("records"):
    response = ollama.chat(
        model='llama3:8b', 
        format="json",
        messages=[
            {
                'role': 'system',
                'content': SYSTEM_PROMPT.format(
                    project=x["project"]
                )
            },
            {
                "role": "user",
                "content": USER_PROMPT.format(
                    task_str=TASK_PROMPT.format(
                        nameA=x["option_name"],
                        typeA=x["option_type"],
                        valueA=x["option_value"],
                        fileA=x["option_file"],
                        technologyA=x["option_technology"],
                        nameB=x["dependent_option_name"],
                        typeB=x["dependent_option_type"],
                        valueB=x["dependent_option_value"],
                        fileB=x["dependent_option_file"],
                        technologyB=x["dependent_option_technology"]
                        
                    ),
                    format_str=FORMAT_STR
                )
            }
        ]
    )
    
    json_obj = json.loads(response['message']['content'])
    try:
        pre_rating.append(json_obj["isDependency"])
    except json.JSONDecodeError:
        pre_rating.append(None)
        
    print("Done")

df["pre_rating"] = pre_rating


{
  "rationale": "The configuration options have the same value 'logstash-logback-encoder' in both project.dependencies.dependency_logstash-logback-encoder.artifactId and project.dependencyManagement.dependencies.dependency_logstash-logback-encoder.artifactId, suggesting a possible value-equality dependency.",
  "uncertainty": 8,
  "isDependency": true
}
Done
{
    "rationale": "The two configuration options are for logging levels in separate Spring applications (mall-admin and mall-portal), which suggests that they are independent settings with no direct value- equality dependency.",
    "uncertainty": 9,
    "isDependency": false
}

 

 
 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 


Done
{
  "rationale": "Both configuration options define the root password for their respective systems (MySQL and Spring), which suggests that they may be intended to have identical values. However, without additional context or documentation, it is unclear wheth

JSONDecodeError: Expecting ':' delimiter: line 14 column 4 (char 690)

In [10]:
df.to_csv("dependencies_with_rules.csv", index=False)