**Evolutionary Changes**

Across the commit history configuration options sometimes change together in the same commit or in subsequent commits, called *co-evolutionary changes*. Such changes may indicate relationships between configuration options that imply that these options need to be changed together to ensure their consistency. An analysis of co-evolutionary changes therefore may help to identify related options.

*Which options frequently change together?*

In [2]:
import glob
import json
import os
import pandas as pd
from collections import Counter
from itertools import combinations
from typing import Dict

def summarize_commits(data, window_size):
    """
    Summarize commits in windows of a given size for configuration options.

    :param data: list of commit data
    :param window_size: size of the commit window
    :return: summarized list of commits with extracted options
    """
    summarized_data = []
    for i in range(0, len(data), window_size):
        window = data[i:i + window_size]
        modified_options = {}

        for commit in window:
            # Skip if no network data
            if not commit.get('network_data'):
                continue

            # Collect options from all files in this commit
            for file_data in commit['network_data'].get('config_file_data', []):
                for pair in file_data.get('modified_pairs', []):
                    # Store the modified option with its details
                    modified_options[pair['option']] = pair

        # Add unique options from the window to the summary
        summarized_data.append({"modified_options": modified_options})

    return summarized_data


def extract_option_cochanges(data: Dict, commit_window=1):
    """
    Extract co-changes of options in the commit history of a software project.

    :param data: list of configuration data from commit history of a software project
    :param commit_window: size of the commit window
    :return: dataframe containing co-changes of options
    """
    # Summarize commits in windows of the specified size
    summarized_commits = summarize_commits(data.get('commit_data', []), commit_window)

    # Counter to store co-changes
    option_pair_counts = Counter()

    # Process each summarized commit
    for summarized_commit  in summarized_commits:
        modified_options = summarized_commit.get("modified_options", {})
        
        # Generate unique pairs of modified options (by option name)
        option_names = list(modified_options.keys())
        option_pairs = [tuple(sorted(pair)) for pair in combinations(option_names, 2)]

        # Count occurrences of each pair and retain full details
        for option1, option2 in option_pairs:
            pair_details = (
                tuple(sorted(modified_options[option1].items())),
                tuple(sorted(modified_options[option2].items()))
            )
            option_pair_counts[pair_details] += 1

    # Prepare DataFrame data
    rows = []
    for (option1_details, option2_details), count in option_pair_counts.items():
        # Extract details for Option1 and Option2
        opt1 = dict(option1_details)
        opt2 = dict(option2_details)
        rows.append({
            "Co-Changed Options": (opt1.get('option'), opt2.get('option')),
            "Commit Window": commit_window,
            "Option1": opt1.get("option"),
            "Values1": (opt1.get("prev_value"), opt1.get("curr_value")),
            "Artifact1": opt1.get("artifact"),
            "Option2": opt2.get("option"),
            "Values2": (opt2.get("prev_value"), opt2.get("curr_value")),
            "Artifact2": opt2.get("artifact"),
            "Changed Internally": count,
            "Percentage Internally": count / len(summarized_commits)
        })

    return pd.DataFrame(rows).sort_values(by="Changed Internally", ascending=False)


project_files = glob.glob("../data/test_projects/*.json")
commit_windows = [1, 2, 5, 10]

for project_file in project_files:
    dfs = []
    print(f"Processing {project_file}")

    project_dir = project_file.split("/")[-2]
    file_name = project_file.split("/")[-1].split(".")[0]
    if not os.path.exists(f"../data/cochanges/{project_dir}"):
        os.makedirs(f"../data/cochanges/{project_dir}")

    with open(project_file, "r", encoding="utf-8") as src:
        data = json.load(src)

    for commit_window in commit_windows:
        print(f"Commit window: {commit_window}")
        df = extract_option_cochanges(data=data, commit_window=commit_window)
        dfs.append(df)
    
    merged_df = pd.concat(dfs)
    merged_df.to_csv(f"../data/cochanges/{project_dir}/{file_name}_cochanges.csv", index=False)

Processing ../data/test_projects/piggymetrics.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/catalyst.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/imaginary.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/TweetMap.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/scifgif.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/docker-antivirus.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/test-config-repo.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/elixir-ms.json
Commit window: 1
Commit window: 2
Commit window: 5
Commit window: 10
Processing ../data/test_projects/mira.json
Commit w

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("../data/cochanges/piggymetrics_cochanges.csv")

# Filter data for a specific commit window
commit_window = 1
filtered_df = df[df["Commit Window"] == commit_window]

# Create a graph
G = nx.Graph()

# Add edges with weights based on 'Changed Internally'
for _, row in filtered_df.iterrows():
    option1 = row["Option1"]
    option2 = row["Option2"]
    weight = row["Changed Internally"]
    if G.has_edge(option1, option2):
        G[option1][option2]["weight"] += weight
    else:
        G.add_edge(option1, option2, weight=weight)

# Define edge weights
edges = G.edges(data=True)
weights = [d['weight'] for (_, _, d) in edges]

# Draw the graph using a circular layout
plt.figure(figsize=(14, 10))
pos = nx.circular_layout(G)

nx.draw_networkx_nodes(G, pos, node_size=500, node_color='lightblue')
nx.draw_networkx_edges(G, pos, width=weights, alpha=0.6)
nx.draw_networkx_labels(G, pos, font_size=8)

plt.title(f"Configuration Option Co-Changes (Circular Layout, Commit Window = {commit_window})")
plt.axis('off')
plt.tight_layout()
plt.show()



FileNotFoundError: [Errno 2] No such file or directory: '../data/cochanges/piggymetrics_cochanges.csv'