In [1]:
import os
import git

def repo_clone(github_url, local_path):
    if os.path.exists(local_path):
        print(f"Local repo path already exists at {local_path}")
        repo = git.Repo(local_path)
        repo.remotes.origin.pull()
    else:
        print(f"Cloning repository from {github_url} to {local_path}...")
        git.Repo.clone_from(github_url, local_path)
    print("Repository is ready locally.")

def is_test_file(filename):
    # Checks if a file is a test file based on the common formats
    return (
        filename.endswith(('.test', '-test', 'test', 'Test', '.spec', '-spec', 'spec', 'Spec', '.yml', '.yaml'))
        or filename.startswith(('test.', 'test-', 'test ', 'Test', 'spec.', 'spec-', 'spec ', 'Spec'))
    )

def collect_test_files(repo_path):
    #Collects test files
    test_files = []
    for root, dirs, files in os.walk(repo_path):
        print(f"Checking directory: {root}")

        # collects if a folder contains 'test' or 'spec'
        if 'test' in root.split(os.sep) or 'spec' in root.split(os.sep):
            for f in files:
                print(f"Found test file in test/spec folder: {f}")
                test_files.append(os.path.join(root, f))
        else:
            for f in files:
                if is_test_file(f):
                    print(f"Found test file by naming pattern: {f}")
                    test_files.append(os.path.join(root, f))

    return test_files

GITHUB_URL = "https://github.com/ruby/ruby"
LOCAL_PATH = "clonedRepo"

# Clone the repo and find test files
repo_clone(GITHUB_URL, LOCAL_PATH)
test_files = collect_test_files(LOCAL_PATH)

# Print results
if test_files:
    print("\n Test files found:")
    for file in test_files:
        print(file)
else:
    print("\n No test files found.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
clonedRepo/spec/ruby/core/env/member_spec.rb
clonedRepo/spec/ruby/core/env/delete_if_spec.rb
clonedRepo/spec/ruby/core/env/assoc_spec.rb
clonedRepo/spec/ruby/core/env/include_spec.rb
clonedRepo/spec/ruby/core/env/shift_spec.rb
clonedRepo/spec/ruby/core/env/each_value_spec.rb
clonedRepo/spec/ruby/core/env/size_spec.rb
clonedRepo/spec/ruby/core/env/to_a_spec.rb
clonedRepo/spec/ruby/core/env/empty_spec.rb
clonedRepo/spec/ruby/core/env/reject_spec.rb
clonedRepo/spec/ruby/core/env/to_h_spec.rb
clonedRepo/spec/ruby/core/env/each_spec.rb
clonedRepo/spec/ruby/core/env/rehash_spec.rb
clonedRepo/spec/ruby/core/env/fetch_spec.rb
clonedRepo/spec/ruby/core/env/has_value_spec.rb
clonedRepo/spec/ruby/core/env/invert_spec.rb
clonedRepo/spec/ruby/core/env/slice_spec.rb
clonedRepo/spec/ruby/core/env/to_s_spec.rb
clonedRepo/spec/ruby/core/env/key_spec.rb
clonedRepo/spec/ruby/core/env/filter_spec.rb
clonedRepo/spec/ruby/core/env/values_spec.

In [2]:
import os

def check_testFile(filename):
    return (
        filename.endswith(('.test', '-test', 'test', 'Test', '.spec', '-spec', 'spec', 'Spec', '.yml', '.yaml'))
        or filename.startswith(('test.', 'test-', 'test ', 'Test', 'spec.', 'spec-', 'spec ', 'Spec'))
    )

def collect_test_files(rootDirectory_path):
    test_files = []
    for root, dirs, files in os.walk(rootDirectory_path):
        if 'test' in root.split(os.sep) or 'spec' in root.split(os.sep):
            test_files.extend([os.path.join(root, f) for f in files])
        else:
            test_files.extend([os.path.join(root, f) for f in files if check_testFile(f)])
    return test_files

repo_path = '/content/clonedRepo'
test_files = collect_test_files(repo_path)

for file in test_files:
    print(file)


/content/clonedRepo/.travis.yml
/content/clonedRepo/tool/test-annocheck.sh
/content/clonedRepo/tool/test-bundled-gems.rb
/content/clonedRepo/tool/test-coverage.rb
/content/clonedRepo/tool/lib/test/unit.rb
/content/clonedRepo/tool/lib/test/unit/parallel.rb
/content/clonedRepo/tool/lib/test/unit/testcase.rb
/content/clonedRepo/tool/lib/test/unit/assertions.rb
/content/clonedRepo/tool/test/test_jisx0208.rb
/content/clonedRepo/tool/test/init.rb
/content/clonedRepo/tool/test/runner.rb
/content/clonedRepo/tool/test/test_sync_default_gems.rb
/content/clonedRepo/tool/test/testunit/test_redefinition.rb
/content/clonedRepo/tool/test/testunit/test4test_sorting.rb
/content/clonedRepo/tool/test/testunit/test_timeout.rb
/content/clonedRepo/tool/test/testunit/test4test_redefinition.rb
/content/clonedRepo/tool/test/testunit/metametameta.rb
/content/clonedRepo/tool/test/testunit/test4test_load_failure.rb
/content/clonedRepo/tool/test/testunit/test_load_failure.rb
/content/clonedRepo/tool/test/testunit/

In [3]:
ls /content/clonedRepo

aclocal.m4       [0m[01;34menc[0m/             main.c           README.md            thread_sync.rb
addr2line.c      encindex.h       [01;34mman[0m/             re.c                 thread_win32.c
addr2line.h      encoding.c       marshal.c        regcomp.c            thread_win32.h
array.c          enum.c           marshal.rb       regenc.c             time.c
array.rb         enumerator.c     math.c           regenc.h             timev.h
ast.c            error.c          memory_view.c    regerror.c           timev.rb
ast.rb           eval.c           method.h         regexec.c            [01;34mtool[0m/
[01;32mautogen.sh[0m*      eval_error.c     mini_builtin.c   regint.h             trace_point.rb
[01;34mbasictest[0m/       eval_intern.h    miniinit.c       regparse.c           transcode.c
[01;34mbenchmark[0m/       eval_jump.c      [01;34mmisc[0m/            regparse.h           transcode_data.h
bignum.c         [01;34mext[0m/             [01;34mmissing[0m/       

In [4]:
print(f"\nTotal test files found: {len(test_files)}")



Total test files found: 7765


In [5]:
def test_loc(file_list):
    #Counts the total lines of code
    total_lines = 0
    for file in file_list:
        try:
            with open(file, "r", encoding="utf-8", errors="ignore") as f:
                lines = f.readlines()
                total_lines += len(lines)
        except Exception as e:
            print(f"Error reading {file}: {e}")

    return total_lines

# Count lines of code in test files
total_test_lines = test_loc(test_files)

# Print results
print(f"\nTotal test files found: {len(test_files)}")
print(f"Total lines of code in test files: {total_test_lines}")



Total test files found: 7765
Total lines of code in test files: 650514


In [6]:
#Deleting the local directory to reuse it for every root repo
import shutil

def delete_local_repo(local_path):
    if os.path.exists(local_path):
        print(f"Deleting existing repository folder: {local_path}")
        shutil.rmtree(local_path)
        print("Folder deleted successfully.")
    else:
        print("No existing repository folder found.")


delete_local_repo('/content/clonedRepo')


Deleting existing repository folder: /content/clonedRepo
Folder deleted successfully.


In [7]:
import requests

def get_total_committers(github_repo, token=None):
    #Fetches the total number of unique committers via GitHub API
    committers = set()
    page = 1
    headers = {"Authorization": f"token {token}"} if token else {}

    while True:
        url = f"https://api.github.com/repos/{github_repo}/contributors?per_page=100&page={page}"
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Error fetching data: {response.status_code}")
            return None

        contributors = response.json()
        if not contributors:
            break

        for contributor in contributors:
            committers.add(contributor["login"])
        #handles pagination
        page += 1

    return len(committers)

github_repo = "Haivision/srt"
github_token = "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"  # Optional, needed for private repos or high request limits

total_committers = get_total_committers(github_repo, github_token)
if total_committers is not None:
    print(f"Total Committers: {total_committers}")


Total Committers: 116
