In [23]:
import os
import git
import subprocess
import json
from typing import Dict
from cfgnet.network.network_configuration import NetworkConfiguration
from cfgnet.network.nodes import ArtifactNode
from cfgnet.network.network import Network
from pprint import pprint



def analyze_config_network(repo_path: str):
    
    network_config = NetworkConfiguration(
        project_root_abs=repo_path,
        enable_static_blacklist=False,
        enable_internal_links=True,
        enable_all_conflicts=True,
        system_level=False
    )

    network = Network.init_network(cfg=network_config)

    artifacts = network.get_nodes(node_type=ArtifactNode)

    config_files_data = []
    for artifact in artifacts:
        config_files_data.append({
            "file_path": artifact.rel_file_path,
            "options": len(artifact.get_pairs())
        })


    config_files = set(artifact.rel_file_path for artifact in artifacts)
    concepts = set(artifact.concept_name for artifact in artifacts)
  	
    network_data = {
        "links": len(network.links),
        "concepts": concepts,
        "config_files": config_files,
        "config_files_data": config_files_data
    }

    return network_data


def analyze_repository(repo_path: str, branch_name: str = "main") -> Dict:
    """Analyze Commit history of repositories and collect stats about the configuration space."""   
    
    repo = git.Repo(repo_path)

    # Save the current branch to return to it later
    current_branch = repo.active_branch.name if not repo.head.is_detached else None
    latest_commit = repo.head.commit.hexsha

    # Get all commits in the repository from oldest to newest
    commits = list(repo.iter_commits(branch_name))[::-1] 

    print(f"Number of commits: {len(commits)}")
    print("\n")

    commit_data = []

    for commit in commits:

        # get commit stats
        stats = commit.stats.total

        # Checkout the commit
        repo.git.checkout(commit.hexsha)

        # Run the external analysis (replace with your actual command)
        try: 
            network_data = analyze_config_network(repo_path=repo_path)
        except Exception as error:
            print(f"Error occurred: {error}")


        # Get general stats per file
        for file_path, file_stats in commit.stats.files.items():

            if file_path in network_data["config_files"]:

                file_data = next(filter(lambda x: x["file_path"] == file_path, network_data["config_files_data"]))
                file_data["insertions"] = file_stats['insertions']
                file_data["deletions"] = file_stats['deletions']
                file_data["total_changes"] = file_stats['insertions'] + file_stats['deletions']

            # Only proceed if the commit has a parent (to avoid errors on the first commit)
            #if commit.parents:
            #    parent_commit = f"{commit.hexsha}^"

                #try:
                    # Run git diff to capture line-by-line changes
                    #diff_output = subprocess.check_output(
                    #    ['git', 'diff', parent_commit, commit.hexsha, '--', file_path],
                    #    cwd=repo_path,
                    #    text=True
                    #)
                    #print("  Diff Output:\n", diff_output)

                    # Capture file size before and after the commit
                    #file_content_after = repo.git.show(f"{commit.hexsha}:{file_path}")
                    #size_after = len(file_content_after.encode('utf-8'))

                    #file_content_before = repo.git.show(f"{parent_commit}:{file_path}")
                    #size_before = len(file_content_before.encode('utf-8'))

                    #print(f"  Size Before: {size_before} bytes")
                    #print(f"  Size After: {size_after} bytes")
                    #print(f"  Net Size Change: {size_after - size_before} bytes")
                
                #except subprocess.CalledProcessError as e:
                    #print(f"  Error running git diff: {e}")
                #except git.exc.GitCommandError:
                    #print("  File did not exist in previous commit (added in this commit)")

            #else:
                #print("  No parent commit available (this is the first commit)")

        print("\n")

        commit_data.append(
            {
                "commit_hash": str(commit.hexsha),
                "author": f"{commit.author.name} <{commit.author.email}>",
                "commit_mgs": str(commit.message),
                "files_changed": stats['files'],
                "insertions": stats['insertions'],
                "deletions": stats['deletions'],
                "network_data": network_data
            }
        )

    # Finally, return to the latest commit
    if current_branch:
        # If we were on a branch, return to it
        repo.git.checkout(current_branch)
        print(f"Returned to original branch: {current_branch}")
    else:
        # If we were in a detached HEAD state, checkout the latest commit directly
        repo.git.checkout(latest_commit)
        print(f"Returned to the latest commit: {latest_commit}")

    for x in commit_data:
        pprint(x)

    with open("../data/test_project_data.json", "w", encoding="utf-8") as dest:
        json.dump(commit_data, dest, indent=2)

test_repo_path = "/home/ssimon/github/config_project"

stats = analyze_repository(repo_path=test_repo_path, branch_name="master")

Number of commits: 4










Returned to the latest commit: 643b6134b92bfec74c822d0436fc7c57ded16935
{'author': 'Sebastian Simon <Bastisimon95@googlemail.com>',
 'commit_hash': '643b6134b92bfec74c822d0436fc7c57ded16935',
 'commit_mgs': 'Initial commit\n',
 'deletions': 0,
 'files_changed': 2,
 'insertions': 86,
 'network_data': {'concepts': {'docker', 'maven'},
                  'config_files': {'src/Dockerfile', 'src/pom.xml'},
                  'config_files_data': [{'deletions': 0,
                                         'file_path': 'src/Dockerfile',
                                         'insertions': 24,
                                         'options': 18,
                                         'total_changes': 24},
                                        {'deletions': 0,
                                         'file_path': 'src/pom.xml',
                                         'insertions': 62,
                                         'options': 27,
                 

TypeError: Object of type set is not JSON serializable