In [1]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

from gen_ai_hub.proxy.native.openai import chat

In [2]:

def test_llm(source_text, llm_model = "gpt-4o-mini"):
    system_prompt = """You are a helpful assistant that can read and understand code.
    You will be provided with a line of code. You're expected to analyze the code and provide a detailed explanation of what it does."""

    user_prompt = f"""Here is the line of code:\n\n{source_text}"""

    response = chat.completions.create(
        model=llm_model,
        temperature=0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    return response.choices[0].message.content

In [3]:
test_llm(
    "def add(a, b): return a + b"
)

"The line of code you provided defines a simple function in Python. Let's break it down:\n\n1. **Function Definition**: The keyword `def` is used to define a function in Python. In this case, the function is named `add`.\n\n2. **Parameters**: The function `add` takes two parameters, `a` and `b`. These parameters are placeholders for the values that will be passed to the function when it is called.\n\n3. **Return Statement**: The function uses the `return` statement to send back a value. In this case, it returns the result of the expression `a + b`.\n\n4. **Function Body**: The body of the function consists of a single expression: `a + b`. This expression adds the two parameters together.\n\n5. **Return Value**: When the function is called with two arguments, it will compute the sum of those arguments and return the result.\n\n### Example Usage:\nHere’s how you might use the `add` function:\n\n```python\nresult = add(3, 5)\nprint(result)  # This will output: 8\n```\n\nIn this example, `

In [4]:
import os
import tempfile
import git

def collect_all_files(repo_url):
    base_path = tempfile.mkdtemp()
    git.Repo.clone_from(repo_url, base_path)
    print(f"Cloned repository to {base_path}")
    
    files_to_exclude = [ "webapp", "node_modules", ".git", ".gitignore", "package-lock.json", "app", "i18n", ".vscode" ]
    
    all_files = []
    
    # Collect all files from the repository
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if not any(exclude in root or exclude in file for exclude in files_to_exclude):
                all_files.append({
                    "path": root,
                    "name": file,
                    "format": file.split(".")[-1]
                })
                print(f"Collected file: {file} from {root}")

    return all_files



In [5]:
collect_all_files("https://github.com/josegouvea24/CAP-Documentator.git")

Cloned repository to /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09
Collected file: README.md from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09
Collected file: package.json from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09
Collected file: .env from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09
Collected file: eslint.config.mjs from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09
Collected file: run.py from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend
Collected file: requirements.txt from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend
Collected file: README.md from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend
Collected file: manifest.yml from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend
Collected file: __init__.py from /var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend/src/ingestion
Collected file: repo.

[{'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09',
  'name': 'README.md',
  'format': 'md'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09',
  'name': 'package.json',
  'format': 'json'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09',
  'name': '.env',
  'format': 'env'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09',
  'name': 'eslint.config.mjs',
  'format': 'mjs'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend',
  'name': 'run.py',
  'format': 'py'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend',
  'name': 'requirements.txt',
  'format': 'txt'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend',
  'name': 'README.md',
  'format': 'md'},
 {'path': '/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpvmrvev09/backend',
  'name': 'manifest.yml',
  'format': 'yml'},
 {'path': '/var/folde

In [None]:
def generate_cds_model_documentation(relevant_file_content, llm_model = "gpt-4o"):
    
    system_prompt = system_prompt = """
        You are a technical documentation assistant specialized in SAP CAP (Cloud Application Programming Model). 
        You will be provided with the full content of a CAP project, including all relevant .cds files, service implementations (.js), and metadata files.

        Your task is to generate comprehensive, structured technical documentation in **Markdown** format. The documentation must include the following clearly separated sections:

        1. **Project File Structure**  
        - Show a complete hierarchical folder and file structure of the CAP project.

        2. **CAP Application Files Overview**  
        - Present a two-column table:  
            | File Name | Description |
        - Describe the role and relevance of each application file.

        3. **Data Model Representation**  
        - Render an entity data model Markdown diagram showing:  
            - All entities, their attributes (name, type)
            - Keys
            - Associations and compositions with their cardinality and navigation

        4. **Tables, Views and Types**  
        - Present a table with 4 columns:  
            | Name | Type (Table/View/Type) | Fields (name, type, default, annotations, etc.) | Annotations | Description |

        5. **CDS Definitions**  
        - For each CDS definition, include:
            - Name
            - CRUD operations supported (Create/Read/Update/Delete)
            - Fields with types and annotations
            - Description
            - Annotations (access control, semantics, etc.)

        6. **Function and Action Imports**  
        - List each function or action with:
            - Name and description
            - Supported operations
            - Associated entities (if any)

        7. **Event Handlers**  
        - Provide a table or list with:
            - Handler type (on/before/after)
            - Associated entity/function/action
            - Description of handler
            - Event type (create/update/delete/etc.)
            - Key logic or implementation notes

        **Formatting & Completeness Instructions**:
        - Format the full output using Markdown headers, bullet points, and tables.
        - Include all sections, even if empty — in that case, label them with `[UNKNOWN]`.
        - Do not omit any requested section.
        - Be explicit. If a detail cannot be found or deduced, mark it as `[UNKNOWN]`.
        - Include no raw code unless required as an example under a relevant section.
        - Aim for maximum completeness, clarity, and usefulness for developers, especially in implementation descriptions.

        The documentation is intended to be rendered in tools like Microsoft Word or Markdown viewers.

        """

    user_prompt = f"""
                    Here are the CAP project files and their contents:

                    {relevant_file_content}
                    """

    
    response = chat.completions.create(
                    model=llm_model,
                    temperature=0,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ]
                )
    
    return response.choices[0].message.content

In [None]:
def generate_cds_model_documentation_from_repo(repo_url, llm_model = "gpt-4o"):
    relevant_files = collect_all_files(repo_url)
    
    content = ""

    for file in relevant_files:
        try:
            full_path = os.path.join(file["path"], file["name"])

            with open(full_path, "r", encoding="utf-8") as f:
                file_content = f.read()

                # Build structured metadata header
                header = (
                    f"\n\n===== FILE: {file['name']} =====\n"
                    f"Format : {file['format']}\n"
                    f"Path   : {file['path']}\n"
                )

                # Combine with content
                content += f"{header}\n{file_content}\n"

        except Exception as e:
            print(f"⚠️ Skipping file {file.get('name', 'unknown')} due to error: {e}")
    
    # Generate documentation using the LLM
    documentation = generate_cds_model_documentation(content, llm_model)
    
    return documentation

In [None]:
docs = generate_cds_model_documentation_from_repo("https://github.tools.sap/Delivery-Scale-PT/opportunity-assistant-CAP.git")
print(docs)

# SAP CAP Project Documentation

## 1. Project File Structure

```
/var/folders/9d/1rdr_39n49n113vgdr6vpndc0000gn/T/tmpmgsi60ha
├── .eslintrc
├── README.md
├── xs-security.json
├── package.json
├── mta.yaml
├── db
│   ├── schema.cds
│   ├── undeploy.json
│   └── src
│       └── .hdiconfig
├── srv
│   ├── api.cds
│   ├── server.js
│   ├── AICoreUtilChatbot.js
│   ├── CFUtil.js
│   ├── bucketUtil.js
│   ├── AICoreUtil.js
│   ├── api.js
│   └── util
│       ├── AxiosUtil.js
│       ├── DataServiceUtil.js
│       └── sharepoint.js
```

## 2. CAP Application Files Overview

| File Name                | Description                                                                 |
|--------------------------|-----------------------------------------------------------------------------|
| .eslintrc                | Configuration file for ESLint, specifying rules and environments.           |
| README.md                | Instructions for running, building, deploying, and debugging the application.|
| xs-security.json         | Security configuration for the application, defining scopes and roles.      |
| package.json             | Node.js project metadata and dependencies.                                  |
| mta.yaml                 | Multi-target application descriptor for deployment configuration.           |
| schema.cds               | CDS file defining the data model entities and types.                        |
| undeploy.json            | Specifies files to be undeployed.                                           |
| .hdiconfig               | Configuration for HANA deployment artifacts.                                |
| api.cds                  | CDS file defining the service layer and API endpoints.                      |
| server.js                | Main server file for handling HTTP and WebSocket connections.               |
| AICoreUtilChatbot.js     | Utility functions for interacting with AI Core Chatbot.                     |
| CFUtil.js                | Utility functions for Cloud Foundry operations.                             |
| bucketUtil.js            | Utility functions for AWS S3 bucket operations.                             |
| AICoreUtil.js            | Utility functions for AI Core operations.                                   |
| api.js                   | Service implementation for handling API requests.                           |
| AxiosUtil.js             | Utility class for Axios HTTP client with token management.                  |
| DataServiceUtil.js       | Utility class for data service operations.                                  |
| sharepoint.js            | Utility functions for SharePoint integration.                               |

## 3. Data Model Representation

```markdown
### Entities

#### EntityObject
- **Attributes:**
  - `external_id`: String(100)
  - `description`: String
  - `metadata`: String(5000)
  - `status`: STATUS (default: 'INITIAL')
- **Keys:**
  - `ID`: cuid
- **Associations:**
  - `files`: Association to many `EntityObjectFile` on `files.ID = $self.ID`

#### EntityObjectFile
- **Attributes:**
  - `filename`: String
  - `location`: String
  - `size`: String
  - `type`: String
  - `comments`: String
- **Keys:**
  - `ID`: cuid
- **Associations:**
  - `entityObject`: Association to one `EntityObject`

### Types

#### STATUS
- **Type:** String enum
- **Values:**
  - `INITIAL`
  - `FILES_UPLOADED`
  - `FILES_STORED_IN_VECTOR_DB`
```

## 4. Tables, Views and Types

| Name              | Type   | Fields                                                                 | Annotations | Description                          |
|-------------------|--------|------------------------------------------------------------------------|-------------|--------------------------------------|
| EntityObject      | Table  | external_id: String(100), description: String, metadata: String(5000), status: STATUS | [UNKNOWN]   | Represents an object with associated files. |
| EntityObjectFile  | Table  | filename: String, location: String, size: String, type: String, comments: String | [UNKNOWN]   | Represents a file associated with an object. |
| STATUS            | Type   | INITIAL, FILES_UPLOADED, FILES_STORED_IN_VECTOR_DB                      | [UNKNOWN]   | Enum type for status values.         |

## 5. CDS Definitions

### EntityObject
- **CRUD Operations Supported:** Create, Read, Update, Delete
- **Fields:**
  - `external_id`: String(100)
  - `description`: String
  - `metadata`: String(5000)
  - `status`: STATUS (default: 'INITIAL')
- **Description:** Represents an object with associated files.
- **Annotations:** [UNKNOWN]

### EntityObjectFile
- **CRUD Operations Supported:** Create, Read, Update, Delete
- **Fields:**
  - `filename`: String
  - `location`: String
  - `size`: String
  - `type`: String
  - `comments`: String
- **Description:** Represents a file associated with an object.
- **Annotations:** [UNKNOWN]

## 6. Function and Action Imports

| Name                                | Description                                                                 | Supported Operations | Associated Entities |
|-------------------------------------|-----------------------------------------------------------------------------|----------------------|---------------------|
| getAdminOptions                     | Retrieves admin options.                                                    | Function             | [UNKNOWN]           |
| setAdminOptions                     | Sets admin options with a default approver.                                 | Action               | [UNKNOWN]           |
| chatMemory                          | Performs chat with memory functionality.                                    | Action               | [UNKNOWN]           |
| chatHistoryByUserId                 | Retrieves chat history by user ID.                                          | Action               | [UNKNOWN]           |
| isChatbotEnabled                    | Checks if the chatbot is enabled.                                           | Function             | [UNKNOWN]           |
| getSharepointConnectUrl             | Retrieves SharePoint connection URL.                                        | Function             | [UNKNOWN]           |
| getAzureToken                       | Retrieves Azure token.                                                      | Function             | [UNKNOWN]           |
| getBidCases                         | Retrieves bid cases with pagination.                                        | Function             | [UNKNOWN]           |
| getBidCasesWithDeliverables         | Retrieves bid cases with deliverables.                                      | Function             | [UNKNOWN]           |
| getStats                            | Retrieves statistics.                                                       | Function             | [UNKNOWN]           |
| isAzureTokenValid                   | Checks if the Azure token is valid.                                         | Function             | [UNKNOWN]           |
| getFilesByCaseId                    | Retrieves files by case ID.                                                 | Function             | [UNKNOWN]           |
| getAllFiles                         | Retrieves all files.                                                        | Function             | [UNKNOWN]           |
| searchFilesByExternalIdOrFileName   | Searches files by external ID or file name.                                 | Function             | [UNKNOWN]           |
| deleteFilesByCaseId                 | Deletes files by case ID.                                                   | Action               | [UNKNOWN]           |
| refreshChat                         | Refreshes chat with a specified model.                                      | Action               | [UNKNOWN]           |
| loadBidCaseDeliverablesToVectorDB   | Loads bid case deliverables to vector database.                             | Function             | [UNKNOWN]           |

## 7. Event Handlers

| Handler Type | Associated Entity/Function/Action | Description                                      | Event Type | Key Logic or Implementation Notes |
|--------------|-----------------------------------|--------------------------------------------------|------------|-----------------------------------|
| on           | getBidCases                       | Fetches bid cases with pagination.               | Read       | Handles errors and returns bid cases. |
| on           | getBidCasesWithDeliverables       | Fetches bid cases with deliverables.             | Read       | Fetches files for each bid case. |
| on           | loadBidCaseDeliverablesToVectorDB | Loads deliverables to vector DB.                 | Create     | Adds and drops locks for case ingestion. |
| on           | getFilesByCaseId                  | Fetches files by case ID.                        | Read       | Handles errors and returns files. |
| on           | deleteFilesByCaseId               | Deletes files by case ID.                        | Delete     | Handles errors during deletion. |
| on           | getAllFiles                       | Fetches all files.                               | Read       | Handles errors and returns files. |
| on           | refreshChat                       | Refreshes chat with a specified model.           | Update     | Handles errors during refresh. |
| on           | getStats                          | Fetches statistics.                              | Read       | Calculates total bid cases and files. |
| on           | getSharepointConnectUrl           | Retrieves SharePoint connection URL.             | Read       | Constructs URL based on context. |
| on           | getAzureToken                     | Retrieves Azure token.                           | Read       | Stores token in global store. |
| on           | isAzureTokenValid                 | Checks if Azure token is valid.                  | Read       | Validates token expiration. |
| on           | updatePreference                  | Updates user preference.                         | Update     | Updates preference based on user email. |
| on           | userInfo                          | Retrieves user information.                      | Read       | Returns user details and admin status. |
| on           | searchFilesByExternalIdOrFileName | Searches files by external ID or file name.      | Read       | Returns search results. |
| on           | getAdminOptions                   | Retrieves admin options.                         | Read       | Fetches admin preferences. |
| on           | setAdminOptions                   | Sets admin options.                              | Update     | Updates admin preferences. |
| on           | chatMemory                        | Performs chat with memory functionality.         | Create     | Returns chat response. |
| on           | chatHistoryByUserId               | Retrieves chat history by user ID.               | Read       | Returns chat history. |
| on           | isChatbotEnabled                  | Checks if chatbot is enabled.                    | Read       | Returns chatbot status. |

---
