# Generate Documentation Worksheet

## Step 1: Generate global description and titles

Start by loading all dependencies, and getting connection from the environment.

In [None]:

from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from openai import AzureOpenAI
from dotenv import load_dotenv
import json
import os

os.environ.clear()
load_dotenv()

### 1 - Load data from index on the given App

In [None]:
# Initialize the AISearch Client
search_client = SearchClient(
    os.getenv("AZURE_SEARCH_ENDPOINT"), 
    os.getenv("AZURE_SEARCH_INDEX_NAME"), 
    AzureKeyCredential(os.getenv("AZURE_SEARCH_KEY")))

# retrieve all the documents, with only the title, code_summary and code_description fields
results = search_client.search(query_type='simple',
        search_text="*" ,
        select='title,code_summary, code_description',
        include_total_count=True)

# results is as iterator object azure.core.paging.ItemPaged
# loop through the results to get the documents
context = ""

code_summaries = []
code_descriptions = []

for result in results:
    description_obj = {
        "code_file": result['title'],
        "code_description": result['code_description']
    }
    code_descriptions.append(description_obj)
    summary_obj = {
        "code_file": result['title'],
        "code_summary": result['code_summary']
    }
    code_summaries.append(summary_obj)

context = json.dumps(code_summaries)

# print nb of summaries found
print(f"Got {len(code_summaries)} code entries")

### 2 - Define prompt to generate the generale description of the application.

In [None]:
app_name = os.getenv("APP_NAME")

# System prompt to generate the documentation
system_prompt = """
    YOUR ROLE :
    - You are a Delphi Software expert with 20 years of experience in banking application development.
    - You MUST describe what the application does at a high level. 
    - Focus on business logic and not the technical details.
    - Explain the business rules in an easy to understand manner for a non technical persona.

    YOUR TASKS :
    - Your job is to generate the functional and technical documentation of all the application based on given source code files.
    - You list *ALL the functionalities of the application* and give reference to *ALL the source code files that are related to each functionality*.
                    
    YOU MUST RESPECT THE FOLLOWING RULES:
    - NO unicode characters in the response.
    - NEVER escape ` characters in the response.
    - The response MUST be written in French.
    - Any reference to a person or a date MUST be removed.
    - You MUST GIVE the code_file names as reference IN each functionnal description section of the documentation.
    - Your output MUST be in a Markdown format ready for a Wiki page.
    - Use the provided Json Array as your only reference for generating the documentation. 
    - NO MORE than 4000 tokens in the response.
    - Add a disclaimer after the main title to say that the document has been generated by an AI
    
    VERBOSITY LEVEL : MEDIUM
    
    CODE SUMMARY JSON ARRAY :
    {context}
""".format(context=context)

print(system_prompt)

# User prompt to trigger the generation
user_prompt = """
    Please describe what the application named '{app_name}' is doing at a high level.
    The Main document title must be '{app_name} - Documentation Fonctionnelle'
    """.format(app_name=app_name)
    
print(user_prompt)

# Use few steps learning to teach the LLM what we want as output

# Prepare the sample input
egg_pas = """{'code_file': 'egg.pas', 'code_summary': "Le code de 'egg.pas' est associé à un écran de gestion et de création des entité "oeufs" dans le jeu. Il permet l'accès à la recherche et la création selon le profil de l'animal. Le formulaire inclut des champs pour saisir des informations sur l'animal, les caractéristiques de l'oeuf ainsi que des boutons pour exécuter la recherche ou la création et des vérifications de validité sur les saisies. Il gère également les interactions avec un système externe pour vérifier l'existence des entités animal et oeuf pour créer de nouveaux liens entre eux. Des messages d'erreur sont affichés en cas de problèmes ou de saisies invalides."}"""
animal_pas = """{'code_file': 'animal.pas', 'code_summary': "Le fichier animal.pas définit un formulaire (TAnimalForm) pour la recherche d'une entité animal dans le jeu. Il comprend un champ de saisie pour entrer le nom de l'animal et son type ainsi qu'un bouton de recherche. Lorsque l'utilisateur appuie sur ce bouton, le formulaire valide la saisie et, si elle est correcte, sauvegarde le contexte et inclut un nouveau formulaire (AnimalType) pour afficher les informations détaillées de l'animal. La possibilité de chercher est contrôlée par les habilitations de l'utilisateur."}"""

user_input_sample = """
[{egg}, {animal}]
""".format(egg=egg_pas, animal=animal_pas)
print(user_input_sample)

# Aggregate the sample
assistant_answer = """
### Recherche et Sélection
- **Recherche et sélection des animaux et oeufs associés** : Fournit des outils pour rechercher et sélectionner des entités animales et oeufs dans le jeu.
- *Fichiers associés* : `egg.pas`, `animal.pas`
"""

## Initialize the documentation

Generate the global description of the application & the titles of the different sections.

In [None]:
os.getenv("OPENAI_API_TURBO_ENDPOINT")

# Create the target directory if it does not exist
os.makedirs('doc', exist_ok=True)

gpt4turbo_client = AzureOpenAI(
            api_version=os.getenv("OPENAI_API_TURBO_API_VERSION"),
            azure_endpoint = os.getenv("OPENAI_API_TURBO_ENDPOINT"),
            api_key=os.getenv("OPENAI_API_TURBO_KEY")
        )

# Call the LLM to generate the global documentation
response = gpt4turbo_client.chat.completions.create(
            model= os.getenv("OPENAI_API_TURBO_DEPLOYMENT_NAME"),
            messages = [
            {"role":"system","content": system_prompt},
            {"role":"user","content": user_input_sample},
            {"role":"assistant","content": assistant_answer},
            {"role":"user","content":user_prompt}
            ],
            temperature=0.5,
            max_tokens=4000,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0
        )
completion=response.choices[0].message.content

filepath = 'doc/{app_name}.md'.format(app_name=app_name)
with open(filepath, 'w') as file:
    # print the 100 first character of completion
    print(completion[0:100])
    file.write(completion)

app_context = completion


## Step2: Generate detailed description on features

### 1 - Extract a JSON list of features from the previous outcome 

First, we take the markdown result to extract the JSON to prepare the loop on each business aspect descriptions

In [None]:
# system prompt to extract a JSON list of functionalities with the associated code filenames
# from the global description of the application generated previously
system_prompt = """
    YOUR ROLE :
    - You are a Delphi Software expert with 20 years of experience in banking application development.
    - You are writing a documentation on a banking app named {app_name}

    YOUR TASKS :
    - Your job is to take the given description and extract a JSON array of the functional aspect of the application with the associated code filenames

    YOU MUST RESPECT THE FOLLOWING RULES:
    - You focus only on the functionalities part of the input
    - You MUST extract an Array of object containing informations on functionalities
    - Each object in the array returned must have a 'title' and a 'files' property
""".format(app_name=app_name)

print(system_prompt)

# the sample input and output uses the few step learning method to help shape more predictable completions
sample_input = """
## Fonctionnalités Principales
    ### Fonctionnalités Générales et Utilitaires
- **Bibliothèque de fonctions communes** : Contient des fonctions réutilisables pour la conversion de données, la validation, la gestion de l'interface utilisateur et l'affichage de messages.
- *Fichier associé* : `shared.pas`

### Recherche et Sélection
- **Recherche et sélection des animaux et oeufs associés** : Fournit des outils pour rechercher et sélectionner des entités animales et oeufs dans le jeu.
- *Fichiers associés* : `egg.pas`, `animal.pas`
"""

sample_output = """
{
    "functionalities":
    [
        {
            title:"Fonctionnalités Générales et Utilitaires",
            files: ['shared.pas']
        },
        {
            title:"Recherche et Sélection",
            files: ['egg.pas', 'animal.pas']
        }
    ]
}
"""

# call the llm api to get the completion
response = gpt4turbo_client.chat.completions.create(
            model= os.getenv("OPENAI_API_TURBO_DEPLOYMENT_NAME"),
            response_format={ "type": "json_object" },
            messages = [
            {"role":"system","content": system_prompt},
            {"role":"user","content": sample_input},
            {"role":"assistant","content": sample_output},
            {"role":"user","content": completion},
            ],
            temperature=0.5,
            max_tokens=4000,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0
        )
json_chapters=response.choices[0].message.content

print(json_chapters)

### 2 - Prepare the prompt for chapter generation

In [1]:
# System prompt to generate the documentation of a given functionality
system_prompt = """
    YOUR ROLE :
    - You are a Delphi Software expert with 20 years of experience in banking application development.
    - You MUST describe what the application does at a high level. 
    - Focus on business logic and not the technical details.
    - Detail the business rules in an easy to understand manner for a non technical persona.

    YOUR TASKS :
    - Your job is to generate the functional and technical documentation of the given functionaliy of the application, based on given source code files.
                    
    YOU MUST RESPECT THE FOLLOWING RULES:
    - Generate a structured documentation in MARKDOWN format.
    - The top level title you generate must be a level 2 title (##).
    - NO unicode characters in the response.
    - NEVER escape ` characters in the response.
    - The response MUST be written in French.
    - Any reference to a person or a date MUST be removed.
    - Your output MUST be in a Markdown format ready for a Wiki page.
    - Use the Code files descriptions given by the user as your only references for generating the documentation. 
    - NO MORE than 4000 tokens in the response.
    
    VERBOSITY LEVEL : HIGH

"""

print(system_prompt)

# user prompt tot trigger the generation
user_prompt = """
    Please describe what the code is doing regarding the {functionality} feature.
    ========================================
    CODE FILES DESCRIPTIONS : 
    {code_descriptions}
    """
    
print(user_prompt)


    YOUR ROLE :
    - You are a Delphi Software expert with 20 years of experience in banking application development.
    - You MUST describe what the application does at a high level. 
    - Focus on business logic and not the technical details.
    - Detail the business rules in an easy to understand manner for a non technical persona.

    YOUR TASKS :
    - Your job is to generate the functional and technical documentation of the given functionaliy of the application, based on given source code files.
                    
    YOU MUST RESPECT THE FOLLOWING RULES:
    - Generate a structured documentation in MARKDOWN format.
    - The top level title you generate must be a level 2 title (##).
    - NO unicode characters in the response.
    - NEVER escape ` characters in the response.
    - The response MUST be written in French.
    - Any reference to a person or a date MUST be removed.
    - Your output MUST be in a Markdown format ready for a Wiki page.
    - Use the Code files

### 3 - Utilitary function to structure the document

Numbering the chapters and the sections

In [None]:
import re

# function that loops through all titles in a MD file and adds cascading numbering to them
def add_numbering_to_titles(filepath):
    with open(filepath, 'r') as file:
        content = file.read()

    # Find all the titles in the content
    titles = re.findall(r'^(#+)(.+)$', content, flags=re.MULTILINE)

    # Add numbering to the titles
    numbered_content = content
    numbering_stack = []

    if titles:  
        for i, title in enumerate(titles):
            # Determine the hierarchical level of the title
            titlemark = title[0]
            titletext = title[1]        
            level = len(titlemark)
            
            if level > 0:
                # Update the numbering stack based on the hierarchical level
                if level > len(numbering_stack):
                    numbering_stack.append(1)
                elif level < len(numbering_stack):
                    numbering_stack = numbering_stack[:level]
                    numbering_stack[-1] += 1
                else:
                    numbering_stack[-1] += 1

                # Generate the numbered title
                numbered_title = "{titlemark} {number}. {titletext}".format(
                    number='.'.join(map(str, numbering_stack)),
                    titlemark=titlemark,
                    titletext=titletext
                )

                # Replace the title with the numbered title in the content
                numbered_content = numbered_content.replace(titlemark + titletext , numbered_title, 1)

    # Write the numbered content back to the file
    with open(filepath, 'w') as file:
        file.write(numbered_content)

### 4 - Loop on chapters & complete document

In [None]:
import concurrent.futures

# load json as object
result = json.loads(json_chapters)
chapters = result['functionalities']

codefiles = dict((file["code_file"], file["code_description"]) for file in code_descriptions)

# Define a function to generate the documentation for a chapter
def generate_documentation(chapter):
    print("start generation for chapter: " + chapter["title"])
    code_description = ""
    # loop on files to build the prompt
    for file in chapter["files"]:
        if codefiles.get(file):
            code_description += """
                FILENAME: {file}
                DESCRIPTION: 
                {description}
                ========================================
            """.format(file=file, description=codefiles[file])
    # llm call to generate the documentation for the chapter
    response = gpt4turbo_client.chat.completions.create(
        model=os.getenv("OPENAI_API_TURBO_DEPLOYMENT_NAME"),
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt.format(functionality=chapter["title"], code_descriptions=code_description)},
        ],
        temperature=0.5,
        max_tokens=4000,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0
    )
    # retrieve the content
    content = response.choices[0].message.content
    # return the title and the content
    return chapter['title'], content

# Split chapters into batches of 4 to parrallelize the generation
# without triggering the TPM limit of the API
batch_size = 4
batches = [chapters[i:i+batch_size] for i in range(0, len(chapters), batch_size)]

# Create an executor for parallel execution
with concurrent.futures.ThreadPoolExecutor() as executor:
    # Iterate over batches and submit tasks for parallel execution
    futures = []
    for batch in batches:
        for chapter in batch:
            future = executor.submit(generate_documentation, chapter)
            futures.append(future)

    # Wait for all tasks to complete and get the results in the original order
    results = []
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

# Append the chapters to the app_name.md file in the original order
with open(filepath, 'a') as file:
    for title, content in results:
        file.write("\n# {title}\n".format(title=title))
        file.write(content)
        print(title + " documentation generated & added to the document")



# Add numbering to the titles in the file
add_numbering_to_titles(filepath)
