 Configuration options often do not exist in isolation. On the contrary, they are interconnected so that changing one configuration option can require changes to other related options to maintain consistency and avoid dependency violations. **Co-evolutionary changes** show which configuration options are changed together, possibly indicating potential dependencies.

*Which confiuration options are changed together?*

In [5]:
import json
import pandas as pd
from itertools import combinations
from collections import Counter
import glob

def summarize_commits(data, window_size):
    """
    Summarize commits in windows of a given size for configuration options.

    :param data: list of commit data
    :param window_size: size of the commit window
    :return: summarized list of commits with extracted options
    """
    summarized_data = []
    for i in range(0, len(data), window_size):
        window = data[i:i + window_size]
        modified_options = {}

        for commit in window:
            # Skip if no network data
            if not commit.get('network_data'):
                continue

            # Collect options from all files in this commit
            for file_data in commit['network_data'].get('config_files_data', []):
                for pair in file_data.get('modified_pairs', []):
                    # Store the modified option with its details
                    modified_options[pair['option']] = pair

        # Add unique options from the window to the summary
        summarized_data.append({"modified_options": modified_options})

    return summarized_data


def extract_option_cochanges(project_name: str, commit_window=1):
    # Load the JSON file
    with open(f"../data/microservice_projects/{project_name}.json") as file:
        data = json.load(file)

    summarized_commits = summarize_commits(data.get('commit_data', []), commit_window)

    # Counter to store co-changes
    option_pair_counts = Counter()

    # Process each summarized commit
    for summarized_commit  in summarized_commits:
        modified_options = summarized_commit .get("modified_options", {})
        
        # Generate unique pairs of modified options (by option name)
        option_names = list(modified_options.keys())
        option_pairs = [tuple(sorted(pair)) for pair in combinations(option_names, 2)]

        # Count occurrences of each pair and retain full details
        for option1, option2 in option_pairs:
            pair_details = (
                tuple(sorted(modified_options[option1].items())),
                tuple(sorted(modified_options[option2].items()))
            )
            option_pair_counts[pair_details] += 1

    # Prepare DataFrame data
    rows = []
    for (option1_details, option2_details), count in option_pair_counts.items():
        # Extract details for Option1 and Option2
        opt1 = dict(option1_details)
        opt2 = dict(option2_details)
        rows.append({
            "Co-Changed Options": (opt1.get('option'), opt2.get('option')),
            "Commit Window": commit_window,
            "Option1": opt1.get("option"),
            "Values1": (opt1.get("prev_value"), opt1.get("curr_value")),
            "Artifact1": opt1.get("artifact"),
            "Option2": opt2.get("option"),
            "Values2": (opt2.get("prev_value"), opt2.get("curr_value")),
            "Artifact2": opt2.get("artifact"),
            "Changed Internally": count,
            "Percentage Internally": count / len(summarized_commits)
        })

    return pd.DataFrame(rows)


project_name = "piggymetrics"
dfs = []

for commit_window in [1, 2, 5, 10]:
    df = extract_option_cochanges(project_name=project_name, commit_window=commit_window)
    dfs.append(df)

merged_df = pd.concat(dfs, ignore_index=True)
merged_df.to_csv(f"../data/cochanges/{project_name}_cochanges.csv", index=False)
merged_df.head(10)

Unnamed: 0,Co-Changed Options,Commit Window,Option1,Values1,Artifact1,Option2,Values2,Artifact2,Changed Internally,Percentage Internally
0,"(project.name, project.packaging)",1,project.name,"(PiggyMetrics, piggymetrics)",pom.xml,project.packaging,"(war, pom)",pom.xml,1,0.003448
1,"(ExecutableName, project.packaging)",1,ExecutableName,"(target/piggymetrics-1.0-SNAPSHOT.war, target/...",pom.xml,project.packaging,"(war, pom)",pom.xml,1,0.003448
2,"(ExecutableNameNoVersion, project.packaging)",1,ExecutableNameNoVersion,"(target/piggymetrics.war, target/piggymetrics....",pom.xml,project.packaging,"(war, pom)",pom.xml,1,0.003448
3,"(ExecutableName, project.name)",1,ExecutableName,"(target/piggymetrics-1.0-SNAPSHOT.war, target/...",pom.xml,project.name,"(PiggyMetrics, piggymetrics)",pom.xml,1,0.003448
4,"(ExecutableNameNoVersion, project.name)",1,ExecutableNameNoVersion,"(target/piggymetrics.war, target/piggymetrics....",pom.xml,project.name,"(PiggyMetrics, piggymetrics)",pom.xml,1,0.003448
5,"(ExecutableName, ExecutableNameNoVersion)",1,ExecutableName,"(target/piggymetrics-1.0-SNAPSHOT.war, target/...",pom.xml,ExecutableNameNoVersion,"(target/piggymetrics.war, target/piggymetrics....",pom.xml,1,0.003448
6,"(security.oauth2.resource.user-info-uri, sprin...",1,security.oauth2.resource.user-info-uri,"(http://localhost:8888/uaa/user, http://localh...",statistics-service/src/main/resources/applicat...,spring.data.mongodb.host,"(localhost, account-mongodb)",account-service/src/main/resources/application...,1,0.003448
7,"(EXPOSE, security.oauth2.client.accessTokenUri)",1,EXPOSE,"(8080, 4000)",gateway/Dockerfile,security.oauth2.client.accessTokenUri,"(http://localhost:8888/uaa/oauth/token, http:/...",statistics-service/src/main/resources/applicat...,1,0.003448
8,"(EXPOSE, security.oauth2.resource.user-info-uri)",1,EXPOSE,"(8080, 4000)",gateway/Dockerfile,security.oauth2.resource.user-info-uri,"(http://localhost:8888/uaa/users/current, http...",statistics-service/src/main/resources/applicat...,1,0.003448
9,"(EXPOSE, spring.data.mongodb.username)",1,EXPOSE,"(8080, 4000)",gateway/Dockerfile,spring.data.mongodb.username,"(service, user)",account-service/src/main/resources/application...,1,0.003448


In [None]:
for file_path in glob.glob(f"../data/analyzed_projects/**"):
    repo_name = file_path.split("/")[-1].split(".")[0]
    dfs = []

    for window_size in [1, 2, 5, 10]:
        df = extract_option_cochanges(file_path, repo_name, window_size)
        dfs.append(df)

    merged_df = pd.concat(dfs, ignore_index=True)
    merged_df.to_csv(f"../data/option_cochanges/{repo_name}_option_cochanges.csv", index=False)