In [27]:
import os
import git
import subprocess
from typing import Dict
from cfgnet.network.network_configuration import NetworkConfiguration
from cfgnet.network.nodes import ArtifactNode
from cfgnet.network.network import Network



def analyze_config_network(repo_path: str):

    network_config = NetworkConfiguration(
        project_root_abs=repo_path,
        enable_static_blacklist=False,
        enable_internal_links=True,
        enable_all_conflicts=True,
        system_level=False
    )

    print("Create Network")
    network = Network.init_network(cfg=network_config)

    print("Network: ", network)

    artifacts = network.get_nodes(node_type=ArtifactNode)

    concepts = set(artifact.concept_name for artifact in artifacts)

    print(f"Num Artifacts: {len(artifacts)}")
    print(f"Concepts: {concepts}")
    print(f"Num Links: {len(network.links)}")


def analyze_repository(repo_path: str, branch_name: str = "main") -> Dict:
    """Analyze Commit history of repositories and collect stats about the configuration space."""   
    
    repo = git.Repo(repo_path)

    # Save the current branch to return to it later
    current_branch = repo.active_branch.name if not repo.head.is_detached else None
    latest_commit = repo.head.commit.hexsha

    # Get all commits in the repository from oldest to newest
    commits = list(repo.iter_commits(branch_name))[::-1] 

    print(f"Number of commits: {len(commits)}")
    print("\n")

    for commit in commits:
        # Display basic commit info
        print(f"Commit: {commit.hexsha}")
        print(f"Author: {commit.author.name} <{commit.author.email}>")
        print(f"Date: {commit.committed_datetime}")
        print(f"Message: {commit.message}")

        stats = commit.stats.total
        print(f"Files Changed: {stats['files']}")
        print(f"Insertions: {stats['insertions']}")
        print(f"Deletions: {stats['deletions']}")
        print(f"Net Changes: {stats['insertions'] - stats['deletions']}")

        # Checkout the commit
        repo.git.checkout(commit.hexsha)
        print(f"Checked out commit: {commit.hexsha}")

        # Run the external analysis (replace with your actual command)
        try: 
            analyze_config_network(repo_path=repo_path)
        except Exception as error:
            print(f"Error occurred: {error}")

        # Get general stats per file
        for file_path, file_stats in commit.stats.files.items():
            print(f"File: {file_path}")
            print(f"  Lines Added: {file_stats['insertions']}")
            print(f"  Lines Deleted: {file_stats['deletions']}")
            print(f"  Total Changes: {file_stats['insertions'] + file_stats['deletions']}")

            # Only proceed if the commit has a parent (to avoid errors on the first commit)
            #if commit.parents:
            #    parent_commit = f"{commit.hexsha}^"

                #try:
                    # Run git diff to capture line-by-line changes
                    #diff_output = subprocess.check_output(
                    #    ['git', 'diff', parent_commit, commit.hexsha, '--', file_path],
                    #    cwd=repo_path,
                    #    text=True
                    #)
                    #print("  Diff Output:\n", diff_output)

                    # Capture file size before and after the commit
                    #file_content_after = repo.git.show(f"{commit.hexsha}:{file_path}")
                    #size_after = len(file_content_after.encode('utf-8'))

                    #file_content_before = repo.git.show(f"{parent_commit}:{file_path}")
                    #size_before = len(file_content_before.encode('utf-8'))

                    #print(f"  Size Before: {size_before} bytes")
                    #print(f"  Size After: {size_after} bytes")
                    #print(f"  Net Size Change: {size_after - size_before} bytes")
                
                #except subprocess.CalledProcessError as e:
                    #print(f"  Error running git diff: {e}")
                #except git.exc.GitCommandError:
                    #print("  File did not exist in previous commit (added in this commit)")

            #else:
                #print("  No parent commit available (this is the first commit)")

        print("\n")

    # Finally, return to the latest commit
    if current_branch:
        # If we were on a branch, return to it
        repo.git.checkout(current_branch)
        print(f"Returned to original branch: {current_branch}")
    else:
        # If we were in a detached HEAD state, checkout the latest commit directly
        repo.git.checkout(latest_commit)
        print(f"Returned to the latest commit: {latest_commit}")


test_repo_path = "/home/simisimon/GitHub/projects/test_project_history"

stats = analyze_repository(repo_path=test_repo_path, branch_name="master")

Number of commits: 4


Commit: 157a941cd30350e745bb28db886fd483c43451ce
Author: Sebastian Simon <bastisimon95@googlemail.com>
Date: 2024-11-08 10:19:26+01:00
Message: Initial commit

Files Changed: 2
Insertions: 86
Deletions: 0
Net Changes: 86
Checked out commit: 157a941cd30350e745bb28db886fd483c43451ce
Create Network
Network:  <cfgnet.network.network.Network object at 0x7f367429b0d0>
Num Artifacts: 0
Concepts: set()
Num Links: 0
File: src/Dockerfile
  Lines Added: 24
  Lines Deleted: 0
  Total Changes: 24
File: src/pom.xml
  Lines Added: 62
  Lines Deleted: 0
  Total Changes: 62


Commit: 52493393998af4d9593e3d5579a529ee787d6ce5
Author: Sebastian Simon <bastisimon95@googlemail.com>
Date: 2024-11-08 10:20:06+01:00
Message: Add docker compose file

Files Changed: 1
Insertions: 13
Deletions: 0
Net Changes: 13
Checked out commit: 52493393998af4d9593e3d5579a529ee787d6ce5
Create Network
Network:  <cfgnet.network.network.Network object at 0x7f36743c3190>
Num Artifacts: 0
Concepts: set()
Num 