In [None]:
import json
import pandas as pd
from typing import Dict
from collections import defaultdict



def get_controbutor_stats(data: Dict):
    # Get project name
    project_name = data.get("project_name", "Unknown")

    # Extract relevant commit data
    commit_data = data.get("config_commit_data", [])

    # Dictionary to store contributor stats
    contributors_stats = defaultdict(lambda: {
        "config_commits": 0,
        "non_config_commits": 0,
        "files_changed": defaultdict(int)
    })

    # Process each commit
    for commit in commit_data:
        author = commit["author"].lower()
        is_config_related = commit["is_config_related"]
        changed_files = commit["network_data"].get("config_files", []) if "network_data" in commit else []

        # Count config and non-config commits
        if is_config_related:
            contributors_stats[author]["config_commits"] += 1
        else:
            contributors_stats[author]["non_config_commits"] += 1

        # Count files changed per contributor
        for file in changed_files:
            contributors_stats[author]["files_changed"][file] += 1

    # Create two separate DataFrames: one for commit statistics and one for changed files
    commit_stats_rows = []
    changed_files_rows = []

    for contributor, stats in contributors_stats.items():
        commit_stats_rows.append({
            "Contributor": contributor,
            "Config Commits": stats["config_commits"],
            "Non-Config Commits": stats["non_config_commits"]
        })
        
        for file, count in stats["files_changed"].items():
            changed_files_rows.append({
                "Contributor": contributor,
                "Changed File": file,
                "File Change Count": count
            })

    # Convert to DataFrames
    commit_stats_df = pd.DataFrame(commit_stats_rows)
    changed_files_df = pd.DataFrame(changed_files_rows)

    commit_stats_df.to_csv(f"../data/results/github/{project_name}/{project_name}_contributors.csv", index=False)
    changed_files_df.to_csv(f"../data/results/github/{project_name}/{project_name}_contributor_files.csv", index=False)


In [None]:
import os

for root, dirs, files in os.walk("../data/results/github"):
    project_dir_name = os.path.basename(root)

    # Skip if no files found
    if len(files) == 0:
        continue 

    print(f"Process {project_dir_name}...")
    file_name = next(filter(lambda x: x.endswith(f"{project_dir_name}.json"), files), None)

    # Skip if no JSON file found
    if not file_name:
        continue


Process test-config-repo...
test-config-repo.json
