In [1]:
import os
import git

def repo_clone(github_url, local_path):
    if os.path.exists(local_path):
        print(f"Local repo path already exists at {local_path}")
        repo = git.Repo(local_path)
        repo.remotes.origin.pull()
    else:
        print(f"Cloning repository from {github_url} to {local_path}...")
        git.Repo.clone_from(github_url, local_path)
    print("Repository is ready locally.")

def is_test_file(filename):
    # Checks if a file is a test file based on the common formats
    return (
        filename.endswith(('.test', '-test', 'test', 'Test', '.spec', '-spec', 'spec', 'Spec', '.yml', '.yaml'))
        or filename.startswith(('test.', 'test-', 'test ', 'Test', 'spec.', 'spec-', 'spec ', 'Spec'))
    )

def collect_test_files(repo_path):
    #Collects test files
    test_files = []
    for root, dirs, files in os.walk(repo_path):
        print(f"Checking directory: {root}")

        # collects if a folder contains 'test' or 'spec'
        if 'test' in root.split(os.sep) or 'spec' in root.split(os.sep):
            for f in files:
                print(f"Found test file in test/spec folder: {f}")
                test_files.append(os.path.join(root, f))
        else:
            for f in files:
                if is_test_file(f):
                    print(f"Found test file by naming pattern: {f}")
                    test_files.append(os.path.join(root, f))

    return test_files

GITHUB_URL = "https://github.com/ruby/ruby"
LOCAL_PATH = "clonedRepo"

# Clone the repo and find test files
repo_clone(GITHUB_URL, LOCAL_PATH)
test_files = collect_test_files(LOCAL_PATH)

# Print results
if test_files:
    print("\n Test files found:")
    for file in test_files:
        print(file)
else:
    print("\n No test files found.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
clonedRepo/spec/ruby/core/env/member_spec.rb
clonedRepo/spec/ruby/core/env/delete_if_spec.rb
clonedRepo/spec/ruby/core/env/assoc_spec.rb
clonedRepo/spec/ruby/core/env/include_spec.rb
clonedRepo/spec/ruby/core/env/shift_spec.rb
clonedRepo/spec/ruby/core/env/each_value_spec.rb
clonedRepo/spec/ruby/core/env/size_spec.rb
clonedRepo/spec/ruby/core/env/to_a_spec.rb
clonedRepo/spec/ruby/core/env/empty_spec.rb
clonedRepo/spec/ruby/core/env/reject_spec.rb
clonedRepo/spec/ruby/core/env/to_h_spec.rb
clonedRepo/spec/ruby/core/env/each_spec.rb
clonedRepo/spec/ruby/core/env/rehash_spec.rb
clonedRepo/spec/ruby/core/env/fetch_spec.rb
clonedRepo/spec/ruby/core/env/has_value_spec.rb
clonedRepo/spec/ruby/core/env/invert_spec.rb
clonedRepo/spec/ruby/core/env/slice_spec.rb
clonedRepo/spec/ruby/core/env/to_s_spec.rb
clonedRepo/spec/ruby/core/env/key_spec.rb
clonedRepo/spec/ruby/core/env/filter_spec.rb
clonedRepo/spec/ruby/core/env/values_spec.

In [2]:
import subprocess
import re

def totalCommits(repo_path):
    #Returns the total number of commits in the repository
    result = subprocess.run(["git", "rev-list", "--count", "HEAD"], cwd=repo_path, capture_output=True, text=True)
    return int(result.stdout.strip()) if result.returncode == 0 else None

def test_lines_changed(repo_path):
    #Returns the total number of lines changed in test files including the travis CI files
    test_file_patterns = re.compile(r'(\btest\b|\.test|_test|-test|Test|\.spec|_spec|-spec|Spec|\.ya?ml)$', re.IGNORECASE)
    total_lines_changed = 0

    result = subprocess.run(["git", "log", "--numstat", "--pretty=format:"], cwd=repo_path, capture_output=True, text=True)

    if result.returncode == 0:
        lines = result.stdout.split("\n")
        for line in lines:
            parts = line.split("\t")
            if len(parts) == 3:
                added, removed, filename = parts
                if test_file_patterns.search(filename):
                    try:
                        added_lines = int(added) if added.isdigit() else 0
                        removed_lines = int(removed) if removed.isdigit() else 0
                        total_lines_changed += added_lines + removed_lines
                    except ValueError:
                        continue

    return total_lines_changed

def calculate_tcpc(repo_path):
    # Calculating test changes per commit
    total_commits = totalCommits(repo_path)
    total_test_lines_changed = test_lines_changed(repo_path)

    if total_commits and total_test_lines_changed:
        return total_test_lines_changed / total_commits
    return None
repo_path = "clonedRepo"

test_changes_per_commit = calculate_tcpc(repo_path)
if test_changes_per_commit is not None:
    print(f"Test Changes per Commit: {test_changes_per_commit:.2f}")
else:
    print("Error calculating metric.")



Test Changes per Commit: 0.73


In [3]:
def count_test_commits(repo_path):
    # Counts commits that changed at least one test file
    repo = git.Repo(repo_path)
    test_commits = set()
    for commit in repo.iter_commits():
        for file in commit.stats.files.keys():
            if is_test_file(os.path.basename(file)):
                test_commits.add(commit.hexsha)
                break

    return len(test_commits)


test_commit_count = count_test_commits(LOCAL_PATH)
print(f"\nTotal commits that changed at least one test file: {test_commit_count}")



Total commits that changed at least one test file: 4205


In [4]:
import subprocess
import os
from datetime import datetime

def get_commit_dates(repo_path):
    # pulls the commit start and end date for the entire project
    #handling exceptions
    try:
        if not os.path.isdir(os.path.join(repo_path, ".git")):
            print("Error: Not a valid Git repository.")
            return None, None

        # Get the first commit date
        first_commit = subprocess.check_output(
            ["git", "log", "--reverse", "--format=%cd", "--date=short"],
            cwd=repo_path,
            text=True
        ).splitlines()[0]

        # Get the last commit date
        last_commit = subprocess.check_output(
            ["git", "log", "-1", "--format=%cd", "--date=short"],
            cwd=repo_path,
            text=True
        ).strip()

        return first_commit, last_commit
    except Exception as e:
        print("Error fetching commit dates:", e)
        return None, None

def calculate_total_active_days(repo_path):
    """Calculate the total number of days a Git repository has been active."""
    first_commit, last_commit = get_commit_dates(repo_path)

    if first_commit and last_commit:
        date_format = "%Y-%m-%d"
        start_date = datetime.strptime(first_commit, date_format)
        end_date = datetime.strptime(last_commit, date_format)
        active_days = (end_date - start_date).days
        print(f"Total active days: {active_days}")
    else:
        print("Error")

if __name__ == "__main__":
    repo_path = input("Enter the local path of the cloned repository: ").strip()
    calculate_total_active_days("/content/clonedRepo")


Enter the local path of the cloned repository: /content/clonedRepo
Total active days: 9922


In [5]:
import subprocess

def get_total_loc(repo_path):
    # to get the total LOC for the entire project for comparison
    # handling exceptions
    try:
        result = subprocess.run(["git", "ls-files"], cwd=repo_path, capture_output=True, text=True, check=True)
        files = result.stdout.strip().split("\n")

        total_loc = 0
        for file in files:
            line_count = subprocess.run(["wc", "-l", file], cwd=repo_path, capture_output=True, text=True)
            if line_count.returncode == 0:
                total_loc += int(line_count.stdout.strip().split()[0])

        return total_loc

    except Exception as e:
        print(f"Error: {e}")
        return None

repo_path = "clonedRepo"

total_loc = get_total_loc(repo_path)
if total_loc is not None:
    print(f"Total Lines of Code (LoC): {total_loc}")
else:
    print("Error calculating LoC.")


Total Lines of Code (LoC): 2629679


In [6]:
#Deleting the local directory to reuse it for every root repo
import shutil

def delete_local_repo(local_path):
    if os.path.exists(local_path):
        print(f"Deleting existing repository folder: {local_path}")
        shutil.rmtree(local_path)
        print("Folder deleted successfully.")
    else:
        print("No existing repository folder found.")


delete_local_repo('/content/clonedRepo')

Deleting existing repository folder: /content/clonedRepo
Folder deleted successfully.
