In [7]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

from gen_ai_hub.proxy.native.openai import chat

In [30]:
import os
import tempfile
import git

IRRELEVANT_FILES = [
    "webapp/",
    "app/",
    "node_modules/",
    ".gitignore",
    "package-lock.json",
    "i18n/",
    ".vscode",
    ".env",
    "eslint",
    ".eslintrc",
    ".git/",
    "__pycache__/",
]
    
def collect_all_files(repo_url, irrelevant_files=IRRELEVANT_FILES):
    
    project_path = tempfile.mkdtemp()
    git.Repo.clone_from(repo_url, project_path)
    print(f"Cloned repository to {project_path}")
    
    relevant_files = []
    
    for root, dirs, files in os.walk(project_path):
        for file in files:
            
            if not any(exclude in file for exclude in irrelevant_files):
                
                abs_path = os.path.join(root, file)
                rel_path = os.path.relpath(abs_path, project_path)
                
            
                relevant_files.append({
                    "name": file,
                    "full_path": abs_path,
                    "rel_path": rel_path,
                    "format": file.split(".")[-1] if "." in file else None,
                    "is_relevant": not any(exclude in rel_path for exclude in irrelevant_files),    
                })

                print(f"Collected file: {file} from {rel_path} with relevance")

    return relevant_files



In [31]:
repo_files = collect_all_files("https://github.com/josegouvea24/CAP-Documentator.git")
repo_files

Cloned repository to /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz
Collected file: README.md from README.md with relevance
Collected file: package.json from package.json with relevance
Collected file: ui5-deploy.yaml from app/fiori-ui/ui5-deploy.yaml with relevance
Collected file: ui5.yaml from app/fiori-ui/ui5.yaml with relevance
Collected file: README.md from app/fiori-ui/README.md with relevance
Collected file: xs-security.json from app/fiori-ui/xs-security.json with relevance
Collected file: package.json from app/fiori-ui/package.json with relevance
Collected file: xs-app.json from app/fiori-ui/xs-app.json with relevance
Collected file: mta.yaml from app/fiori-ui/mta.yaml with relevance
Collected file: tsconfig.json from app/fiori-ui/tsconfig.json with relevance
Collected file: index.html from app/fiori-ui/webapp/index.html with relevance
Collected file: Component.js from app/fiori-ui/webapp/Component.js with relevance
Collected file: manifest.json from app/fiori-ui/

[{'name': 'README.md',
  'full_path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz/README.md',
  'rel_path': 'README.md',
  'format': 'md',
  'is_relevant': True},
 {'name': 'package.json',
  'full_path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz/package.json',
  'rel_path': 'package.json',
  'format': 'json',
  'is_relevant': True},
 {'name': 'ui5-deploy.yaml',
  'full_path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz/app/fiori-ui/ui5-deploy.yaml',
  'rel_path': 'app/fiori-ui/ui5-deploy.yaml',
  'format': 'yaml',
  'is_relevant': False},
 {'name': 'ui5.yaml',
  'full_path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz/app/fiori-ui/ui5.yaml',
  'rel_path': 'app/fiori-ui/ui5.yaml',
  'format': 'yaml',
  'is_relevant': False},
 {'name': 'README.md',
  'full_path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpzev8jmrz/app/fiori-ui/README.md',
  'rel_path': 'app/fiori-ui/README.md',
  'format': 'md',
  'is_rel

In [None]:
def generate_cds_documentation(relevant_file_content, llm_model = "gpt-4o", temperature = 0):
    
    system_prompt = """
        You are a technical documentation assistant specialized in SAP CAP (Cloud Application Programming Model). 
        You will be provided with the full content of a CAP project, including all relevant .cds files, service implementations (.js), and metadata files.

        Your task is to generate comprehensive, structured technical documentation in **Markdown** format. The documentation must include the following clearly separated sections:

        1. **Project File Structure**  
        - Show a complete hierarchical folder and file structure of the CAP project.

        2. **CAP Application Files Overview**  
        - Present a two-column table:  
            | File Name | Description |
        - Describe the role and relevance of each application file.

        3. **Data Model Representation**  
        - Render an entity data model Markdown diagram showing:  
            - All entities, their attributes (name, type)
            - Keys
            - Associations and compositions with their cardinality and navigation

        4. **Tables, Views and Types**  
        - Present a table with 4 columns:  
            - Name
            - Type (Table/View/Type) 
            - Fields (name, key, type, default, annotations, etc.) separated by a new line
            - Annotations
            - Description

        5. **CDS Definitions**  
        - For each CDS entity definition, include:
            - Name
            - DB entity of which it is a projection
            - CRUD operations supported (Create/Read/Update/Delete)
            - Fields with types and annotations
            - Description
            - Annotations (access control, semantics, etc.)

        6. **Function and Action Imports**  
        - List each function or action with:
            - Name and description
            - Supported operations
            - Associated entities (if any)

        7. **Event Handlers**  
        - Provide a table with:
            - Handler type (on/before/after)
            - Event type (create/update/delete/get/post)
            - Associated entity,function or action
            - Description of handler
            - Implementation description
            - Helper functions used
            
        8. **Server Helper Functions**
        - Table all helper functions found in "srv/" folder files with:
            - Name
            - Location (file name)
            - Description
            - Parameters
            - Return type
            - Implementation description
        
        
        **Formatting & Completeness Instructions**:
        - Format the full output using Markdown headers, bullet points, and tables.
        - Do not omit any requested section.
        - Be explicit. If a detail cannot be found or deduced, mark it as `[UNKNOWN]`. If a detail is not applicable, mark it as `[NA]`.
        - Include no raw code unless required as an example under a relevant section.
        - Aim for maximum completeness, clarity, and usefulness for developers, especially in implementation descriptions.

        The documentation is intended to be rendered in tools like Microsoft Word or Markdown viewers.

        """

    user_prompt = f"""
                    Here are the CAP project files and their contents:

                    {relevant_file_content}
                    """

    
    response = chat.completions.create(
                    model=llm_model,
                    temperature=temperature,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ]
                )
    
    return response.choices[0].message.content

In [46]:
import os

def load_file_content(file_list, only_relevant=True):
    content = ""

    for file in file_list: 
        header = (
                    f"\n\n===== FILE: {file['name']} =====\n"
                    f"Path   : {file['rel_path']}\n"
                    f"Format : {file['format']}\n"
                )
        
        if only_relevant and not file["is_relevant"]:
            file_content = "THIS FILE'S CONTENT IS IRRELEVANT FOR CAP PROJECT DOCUMENTATION"
        else:
            try:
                with open(file["full_path"], "r", encoding="utf-8") as f:
                    file_content = f.read()
            except Exception as e:
                file_content = f"⚠️ Skipping file {file.get('rel_path', 'unknown')} due to error: {e}"

        content += f"{header}\n{file_content}\n"
    
    return content

In [47]:
result = generate_cds_documentation(
            relevant_file_content = load_file_content(
                repo_files, only_relevant=False
            ),
            llm_model = "gpt-4o"
        )

print(result)

# CAP Documentator Technical Documentation

## 1. Project File Structure

```bash
CAP-Documentator/
├── app/                            # Frontend application
│   └── fiori-ui/                   # Freestyle SAPUI5 Fiori app
│       ├── webapp/                 # Main source folder for the UI app
│       │   ├── controller/         # Handles event logic for views
│       │   ├── css/                # Custom .css stylesheets
│       │   ├── i18n/               # Internationalization (i18n) resource bundles
│       │   ├── model/              # Data models (OData, JSON)
│       │   ├── test/               # UI unit/integration tests
│       │   └── view/               # XML views that define the UI layout
│       ├── Component.ts            # Main entry module for the UI5 app
│       ├── index.html              # App entry HTML file
│       ├── manifest.json           # App descriptor (routes, dataSources, models)
│       ├── xs-app.json             # CF HTML5 app router config
│       ├──

# CAP Documentator Technical Documentation

## 1. Project File Structure

```bash
CAP-Documentator/
├── app/                            # Frontend application
│   └── fiori-ui/                   # Freestyle SAPUI5 Fiori app
│       ├── webapp/                 # Main source folder for the UI app
│       │   ├── controller/         # Handles event logic for views
│       │   ├── css/                # Custom .css stylesheets
│       │   ├── i18n/               # Internationalization (i18n) resource bundles
│       │   ├── model/              # Data models (OData, JSON)
│       │   ├── test/               # UI unit/integration tests
│       │   └── view/               # XML views that define the UI layout
│       ├── Component.ts            # Main entry module for the UI5 app
│       ├── index.html              # App entry HTML file
│       ├── manifest.json           # App descriptor (routes, dataSources, models)
│       ├── xs-app.json             # CF HTML5 app router config
│       ├── xs-security.json        # Security descriptor (used with XSUAA)
│       ├── mta.yaml                # Multi-target application descriptor (for CF)
│       ├── ui5.yaml                # UI5 tooling configuration
│       ├── ui5-deploy.yaml         # UI5 deployer config for CF HTML5 repo
│       ├── ui5-local.yaml          # UI5 tooling config for local run
│       ├── package.json            # NPM UI package metadata
│       └── package-lock.json       # NPM UI package dependency lock file
├── backend/                        # Python backend (Flask, LangChain)
│   ├── src/
│   │   ├── api/                    # API endpoints
│   │   ├── ingestion/              # GitHub repo cloning and preparation
│   │   ├── processing/             # File structure + CAP metadata parsing
│   │   ├── rag/                    # LangChain logic and vector store setup
│   │   ├── generation/             # .docx file generation using python-docx
│   │   ├── models/                 # Embedding & LLM initialization
│   │   └── utils/                  # Logging, common utilities
│   ├── manifest.yml                # Backend deployment descriptor
│   ├── requirements.txt            # Python dependencies
│   └── src/api/main.py             # Flask app entrypoint
├── package.json                    # NPM project metadata
├── package-lock.json               # NPM project dependency lock file
├── db/                             # Database schema and data
│   ├── schema.cds                  # CDS schema definitions
│   └── data/                       # Data files
│       ├── sap.capdocumentator-File.csv
│       └── sap.capdocumentator-Repository.csv
├── srv/                            # Service implementations
│   ├── documentation-service.js    # Service logic for documentation
│   ├── server.js                   # Server entrypoint
│   ├── documentation-service.cds   # CDS service definitions
│   └── utils/                      # Utility functions
│       └── flask.js                # Helper functions for Flask integration
```

## 2. CAP Application Files Overview

| File Name                        | Description                                                                 |
|----------------------------------|-----------------------------------------------------------------------------|
| README.md                        | Project overview and setup instructions.                                    |
| package.json                     | NPM project metadata and dependencies.                                      |
| ui5-deploy.yaml                  | UI5 deployer configuration for Cloud Foundry.                               |
| ui5.yaml                         | UI5 tooling configuration.                                                  |
| xs-security.json                 | Security descriptor for XSUAA integration.                                  |
| xs-app.json                      | HTML5 app router configuration for Cloud Foundry.                           |
| mta.yaml                         | Multi-target application descriptor for Cloud Foundry deployment.           |
| tsconfig.json                    | TypeScript configuration for UI5 app.                                       |
| index.html                       | Entry HTML file for the UI5 application.                                    |
| Component.js                     | Main entry module for the UI5 application.                                  |
| manifest.json                    | Application descriptor including routes, models, and data sources.          |
| locate-reuse-libs.js             | Script for locating reuse libraries in UI5.                                 |
| testsuite.qunit.html             | QUnit test suite HTML file.                                                 |
| testsuite.qunit.js               | QUnit test suite JavaScript file.                                           |
| flpSandbox.html                  | Fiori Launchpad sandbox configuration.                                      |
| unitTests.qunit.html             | Unit test suite HTML file.                                                  |
| AllTests.js                      | Aggregator for unit tests.                                                  |
| unitTests.qunit.js               | Unit test suite JavaScript file.                                            |
| MainView.controller.js           | Controller logic for the main view.                                         |
| AllJourneys.js                   | Integration test journeys.                                                  |
| NavigationJourney.js             | Navigation journey integration tests.                                       |
| opaTests.qunit.js                | OPA test suite JavaScript file.                                             |
| opaTests.qunit.html              | OPA test suite HTML file.                                                   |
| Startup.js                       | Startup arrangements for integration tests.                                 |
| MainView.js                      | Page object for main view in integration tests.                             |
| App.js                           | Page object for app in integration tests.                                   |
| style.css                        | Custom stylesheets for the UI5 application.                                 |
| App.controller.js                | Controller logic for the app view.                                          |
| MainView.controller.js           | Controller logic for the main view.                                         |
| BaseController.js                | Base controller providing common functionality.                             |
| fioriSandboxConfig.json          | Fiori sandbox configuration.                                                |
| models.js                        | Model definitions for the UI5 application.                                  |
| App.view.xml                     | XML view definition for the app view.                                       |
| MainView.view.xml                | XML view definition for the main view.                                      |
| i18n.properties                  | Internationalization resource bundle.                                       |
| run.py                           | Python script to run the Flask application.                                 |
| requirements.txt                 | Python dependencies for the backend.                                        |
| README.md                        | Backend overview and setup instructions.                                    |
| manifest.yml                     | Cloud Foundry deployment descriptor for the backend.                        |
| llm.ipynb                        | Jupyter notebook for LLM experiments.                                       |
| __init__.py                      | Initialization file for Python packages.                                    |
| repo.py                          | Logic for cloning and preparing GitHub repositories.                        |
| file_loader.py                   | Logic for loading file content from repositories.                           |
| main.py                          | Flask application entrypoint.                                               |
| llm.py                           | Logic for generating CDS documentation using LLM.                           |
| schema.cds                       | CDS schema definitions for the database.                                    |
| sap.capdocumentator-File.csv     | CSV data file for File entity.                                              |
| sap.capdocumentator-Repository.csv| CSV data file for Repository entity.                                        |
| documentation-service.js         | Service logic for fetching README content.                                  |
| server.js                        | Server entrypoint for CAP services.                                         |
| documentation-service.cds        | CDS service definitions for documentation.                                  |
| flask.js                         | Helper functions for Flask integration.                                     |

## 3. Data Model Representation

```markdown
@startuml
entity Repository {
  * ID : UUID
  --
  name : String
  url : String
  files : Association to many File
}

entity File {
  * ID : UUID
  --
  name : String
  relativePath : String
  type : String
  repo : Association to one Repository
}

Repository "1" -- "many" File : files
File "1" -- "1" Repository : repo
@enduml
```

## 4. Tables, Views and Types

| Name       | Type   | Fields                                                                 | Annotations | Description                           |
|------------|--------|------------------------------------------------------------------------|-------------|---------------------------------------|
| Repository | Table  | ID: UUID, name: String, url: String, files: Association to many File   | [UNKNOWN]   | Represents a GitHub repository.       |
| File       | Table  | ID: UUID, name: String, relativePath: String, type: String, repo: Association to one Repository | [UNKNOWN]   | Represents a file within a repository.|

## 5. CDS Definitions

### Repository

- **Name**: Repository
- **DB Entity**: Projection on `db.Repository`
- **CRUD Operations Supported**: Read
- **Fields**:
  - ID: UUID
  - name: String
  - url: String
  - files: Association to many File
- **Description**: Represents a GitHub repository.
- **Annotations**: [UNKNOWN]

### File

- **Name**: File
- **DB Entity**: Projection on `db.File`
- **CRUD Operations Supported**: Read
- **Fields**:
  - ID: UUID
  - name: String
  - relativePath: String
  - type: String
  - repo: Association to one Repository
- **Description**: Represents a file within a repository.
- **Annotations**: [UNKNOWN]

## 6. Function and Action Imports

### fetchReadMeFromGitHub

- **Name**: fetchReadMeFromGitHub
- **Description**: Retrieves README content for a given GitHub URL.
- **Supported Operations**: Action
- **Associated Entities**: None

## 7. Event Handlers

| Handler Type | Event Type | Associated Entity/Function/Action | Description of Handler | Implementation Description | Helper Functions Used |
|--------------|------------|-----------------------------------|------------------------|----------------------------|-----------------------|
| on           | fetchReadMeFromGitHub | [UNKNOWN] | Handles the action to fetch README content from GitHub. | Validates URL and calls Flask backend to fetch README. | fetchReadme |

## 8. Server Helper Functions

### fetchReadme

- **Name**: fetchReadme
- **Location**: srv/utils/flask.js
- **Description**: Calls Flask backend to fetch README content for a given GitHub URL.
- **Parameters**: repoUrl (string)
- **Return Type**: Promise<string>
- **Implementation Description**: Sends a POST request to the Flask backend with the repository URL and returns the README content. Handles errors and logs messages.