In [None]:
import json
from collections import defaultdict
from typing import Dict
import pandas as pd


def get_contributors_and_files(data: Dict) -> pd.DataFrame:
    # Extract relevant commit data
    commit_data = data.get("commit_data", [])

    # Dictionary to store contributor stats
    contributors_stats = defaultdict(lambda: {
        "config_commits": 0,
        "non_config_commits": 0,
        "files_changed": defaultdict(int)
    })

    # Process each commit
    for commit in commit_data:
        author = commit["author"].lower()
        is_config_related = commit["is_config_related"]
        changed_files = commit["network_data"].get("config_file_data", []) if commit["network_data"] else []

        # Count config and non-config commits
        if is_config_related:
            contributors_stats[author]["config_commits"] += 1
        else:
            contributors_stats[author]["non_config_commits"] += 1

        # Count files changed per contributor
        for file in changed_files:
            if file["is_modified"]:
                contributors_stats[author]["files_changed"][file["file_path"]] += 1

    # Create two separate DataFrames: one for commit statistics and one for changed files
    commit_stats_rows = []
    changed_files_rows = []

    for contributor, stats in contributors_stats.items():
        commit_stats_rows.append({
            "Contributor": contributor,
            "Config Commits": stats["config_commits"],
            "Non-Config Commits": stats["non_config_commits"]
        })
        
        for file, count in stats["files_changed"].items():
            changed_files_rows.append({
                "Contributor": contributor,
                "Changed File": file,
                "File Change Count": count
            })

    # Convert to DataFrames
    commit_stats_df = pd.DataFrame(commit_stats_rows)
    changed_files_df = pd.DataFrame(changed_files_rows)
    commit_stats_df = commit_stats_df.sort_values(by="Config Commits", ascending=False)

    return commit_stats_df, changed_files_df


data_file = "../data/test_projects/piggymetrics.json"
with open(data_file, "r") as file:
    data = json.load(file)

commit_stats_df, changed_files_df = get_contributors_and_files(data)