In [None]:
from typing import Dict

def get_commit_stats(data: Dict):
    """Get the number of commits and the number of config related commits."""
    config_commit_data = [commit for commit in data["config_commit_data"] if commit["is_config_related"]]
    non_config_commit_data = [commit for commit in data["config_commit_data"] if not commit["is_config_related"]]
    total_commits = len(config_commit_data) + len(non_config_commit_data)

    assert total_commits == data["len_commits"]

    return {
        "total_commits": total_commits,
        "config_commits": len(config_commit_data),
        "config_commits_percent": round(len(config_commit_data) / total_commits, 2),
        "non_config_commits": len(non_config_commit_data),
        "non_config_commits_percent": round(len(non_config_commit_data) / total_commits, 2),
    }


def get_technology_stats(data: Dict):
    """Get the technologies used in the project."""
    concepts = []

    for commit in data["config_commit_data"]:
        if commit["is_config_related"]:
            concepts += (commit["network_data"]["concepts"])
        

    return {
        "total_concepts": len(set(concepts)),
        "concepts": [set(concepts)],
    }


def get_config_file_stats(data: Dict):
    """Get config file stats."""
    commit_data = [commit for commit in data["config_commit_data"] if commit["is_config_related"]]

    # Initialize tracking structures
    changed_files = []
    added_files = []
    removed_files = []

    seen_files = set()

    for commit in commit_data:
        changed_file_count = 0
        added_file_count = 0
        removed_file_count = 0
        current_commit_files = set()

        for config_file in commit["network_data"]["config_files_data"]:
            file_path = config_file["file_path"]

            insertions = config_file.get("insertions", 0)
            deletions = config_file.get("deletions", 0)
            if insertions > 0 or deletions > 0:
                changed_file_count += 1

            if file_path not in seen_files:
                added_file_count += 1

            current_commit_files.add(file_path)

        removed_files_in_commit = seen_files - current_commit_files
        removed_file_count += len(removed_files_in_commit)

        seen_files = current_commit_files

        # Store stats for averaging
        changed_files.append(changed_file_count)
        added_files.append(added_file_count)
        removed_files.append(removed_file_count)

    return {
        "avg_files_changed": round(sum(changed_files) / len(commit_data), 2),
        "avg_files_added": round(sum(added_files) / len(commit_data), 2),
        "avg_files_removed": round(sum(removed_files) / len(commit_data), 2),
    }


def get_option_stats(data: Dict):
    """Get option stats."""
    commit_data = [commit for commit in data["config_commit_data"] if commit["is_config_related"]]

    # Initialize tracking structures
    changed_options = []
    added_options = []
    removed_options = []

    for commit in commit_data:
        added = removed = changed = 0
        for file_data in commit["network_data"]["config_files_data"]:
            added += len(file_data.get("added_pairs", []))
            removed += len(file_data.get("removed_pairs", []))
            changed += len(file_data.get("modified_pairs", []))

        changed_options.append(changed)
        added_options.append(added)
        removed_options.append(removed)
        
    return {
        "avg_options_changed": round(sum(changed_options) / len(commit_data), 2),
        "avg_options_added": round(sum(added_options) / len(commit_data), 2),
        "avg_options_removed": round(sum(removed_options) / len(commit_data), 2),
    }




def get_links():
    # num links
    # average links added per commit
    # average links removed per commit
    pass



In [None]:
import glob
import json
from pprint import pprint


stats = []

# for project_file in glob.glob("../data/results/microservices/*.json"):
#     project_name = project_file.split("/")[-1].split(".")[0]
#     print(f"Processing {project_name} ...")
    
with open("../data/analyzed_projects/piggymetrics.json", "r", encoding="utf-8") as src:
    project_data = json.load(src)

    
commits_stats = get_commit_stats(data=project_data)
technology_stats = get_technology_stats(data=project_data)
config_file_stats = get_config_file_stats(data=project_data)
option_stats = get_option_stats(data=project_data)

project_stats = {
    "name": project_data["project_name"],
    "total_concepts": technology_stats["total_concepts"],
    "concepts": technology_stats["concepts"],
    "num_commits": commits_stats["total_commits"],
    "num_config_commits": commits_stats["config_commits"],
    "per_config_commits": commits_stats["config_commits_percent"],
    "num_non_config_commits": commits_stats["non_config_commits"],
    "per_non_config_commits": commits_stats["non_config_commits_percent"],
    "avg_files_changed": config_file_stats["avg_files_changed"],
    "avg_files_added": config_file_stats["avg_files_added"],
    "avg_files_removed": config_file_stats["avg_files_removed"],
    "avg_options_changed": option_stats["avg_options_changed"],
    "avg_options_added": option_stats["avg_options_added"],
    "avg_options_removed": option_stats["avg_options_removed"]
}

pprint(project_stats)

In [10]:
import json

data_file = "../data/analyzed_projects/mall.json"

with open(data_file, "r", encoding="utf-8") as src:
    data = json.load(src)

technologies = set()

commits = [commit for commit in data["config_commit_data"] if commit["is_config_related"]]

for commit in commits:
    network_data = commit["network_data"]

    if not network_data:
        continue

    for concept in network_data["concepts"]:
        technologies.add(concept)

print(technologies)

{'docker-compose', 'configparser', 'yaml', 'maven', 'docker', 'json', 'spring'}
