In [38]:
import os
import tempfile
import git
from pathlib import Path
from collections import defaultdict

IGNORED_DIRS = {"node_modules", ".git", ".vscode", "i18n", "dist"}
IGNORED_FILES = {".gitignore", "package-lock.json", ".env"}

def clone_repo(repo_url):
    temp_dir = tempfile.mkdtemp()
    git.Repo.clone_from(repo_url, temp_dir)
    return temp_dir

def classify_files(repo_path):
    repo_path = Path(repo_path)
    files_by_category = defaultdict(lambda: defaultdict(list))
    all_files = []

    for root, dirs, files in os.walk(repo_path):
        # Filter out ignored directories
        dirs[:] = [d for d in dirs if d not in IGNORED_DIRS]
        
        for file in files:
            # Filter out ignored files
            if file in IGNORED_FILES:
                continue
            
            path = Path(root) / file
            rel_path = path.relative_to(repo_path)
            all_files.append(rel_path)

            # Group by top-level folder (CAP convention)
            parts = rel_path.parts
            category = parts[0] if len(parts) > 1 else "root"
            files_by_category[category][path.suffix.lower()].append(rel_path)

    return files_by_category

In [40]:
temp_dir = clone_repo("https://github.tools.sap/Delivery-Scale-PT/opportunity-assistant-CAP")
files_by_type = classify_files(temp_dir)
dict(files_by_type)

{'root': defaultdict(list,
             {'': [PosixPath('core'), PosixPath('.eslintrc')],
              '.md': [PosixPath('README.md')],
              '.json': [PosixPath('xs-security.json'),
               PosixPath('package.json')],
              '.yaml': [PosixPath('mta.yaml')]}),
 'app': defaultdict(list,
             {'.cds': [PosixPath('app/services.cds'),
               PosixPath('app/opportunityassistantcockpit/annotations.cds')],
              '.yaml': [PosixPath('app/opportunityassistantcockpit/ui5-deploy.yaml'),
               PosixPath('app/opportunityassistantcockpit/ui5.yaml')],
              '.md': [PosixPath('app/opportunityassistantcockpit/README.md')],
              '.json': [PosixPath('app/opportunityassistantcockpit/package.json'),
               PosixPath('app/opportunityassistantcockpit/xs-app.json'),
               PosixPath('app/opportunityassistantcockpit/webapp/manifest.json'),
               PosixPath('app/opportunityassistantcockpit/webapp/model/sideContent.