# Code enrichment

This notebook is used to add context to the code.

## 1. Setup OpenAI API

First we setup the OpenAI API.


In [None]:
import openai
import os

API_KEY = os.getenv('AZURE_OPENAI_API_KEY') 
RESOURCE_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')

openai.api_type = 'azure'
openai.api_key = API_KEY if API_KEY else ''
openai.api_base = RESOURCE_ENDPOINT if RESOURCE_ENDPOINT else 'https://<your-api-name>.openai.azure.com'
openai.api_version = '2023-03-15-preview'

gpt_model_name = '<Add your gpt model name here>'

## 2. Setup the system commands to generate context

There are several system commands that we use to generate context:

* `system_command_description`: This command is used to generate a description of the implementation that is added at the begining of the file as a comment.
* `system_command_documented_code`: This command is used to return the code fully documented (e.g. with javadoc for Java).

In [None]:
system_command_description = {
    'role': 'system',
    'content': 'Assistant is an AI chatbot that helps users turn a Java class implementation into a complete description and explanation of the java code, how it can be improve and what are the issues with the code.'
}

system_command_documented_code = {
    'role': 'system',
    'content': 'Assistant is an AI chatbot that helps users turn a Java code into a documented java code. Only the code is needed. Document the class just before its definition. Documents attributes and methods too. Create a detailed documentation.'
}

## 3. Enrich the code

### 3.1. Setup the parameters

First, all parameters are defined. The values below are for java code processing. If you want to process another language, you need to change the values of the parameters and the system commands above.

In [None]:
# Directories where the code is located
code_root_directory = '<add your path here>'

# The code file extensions to be processed
code_file_extensions = [
    '.java'
]

# The code comment block start and end
code_comment_block_start = '/*'
code_comment_block_end = '*/'

# The folder where the enriched code will  be saved
# A folder with the same name as the root directory will be created in this output folder.
output_folder = '/outputs'

# The keywords that represents folders to be ignored
ignore_paths = [
    '.git',
    '.vscode',
    'target',
    'node_modules',
    'build'
]

# Set if the code file in the output folder should be replaced or not
replace_code_file = False

### 3.2. Common functions

The functions below are used to check if the path must be ignored, if the file contains the right extension and to compute the output dirpath. The last method is used to copy files from source to destination.

In [None]:
def is_path_ignored(path):
    """Checks if the path is in the ignore list or not.

    Parameters
    ----------
    path : str
        The path to be checked.

    Returns
    -------
    bool
        True if the path is in the ignore list, False otherwise.
    """
    for ignore_path in ignore_paths:
        if ignore_path in path:
            return True
    return False

def is_file_valid_code_file(filename):
    """Checks if the file is a valid code file or not.

    Valid means that the file extension is in the list of code file extensions to be processed.

    Parameters
    ----------
    filename :
        The name of the file to be checked.

    Returns
    -------
    bool
        True if the file extension is valid, False otherwise.
    """
    for code_file_extension in code_file_extensions:
        if filename.endswith(code_file_extension):
            return True
    return False

def get_output_dirpath(dirpath):
    """Gets the output directory path for the given directory path.

    Parameters
    ----------
    dirpath : str
        The directory path.

    Returns
    -------
    str
        The output directory path.
    """
    _dirpath = dirpath
    if _dirpath.startswith('./'):
        _dirpath = _dirpath.replace('./', '')
    return os.path.join(output_folder, _dirpath)

def copy_file_to_destination(source_file, destination_file, buffer_size=1024*1024):
    """Copies the source file to the destination file.

    Parameters
    ----------
    source_file
        The source file.
    destination_file
        The destination file.
    buffer_size : int, optional
        The buffer size, by default 1024*1024.
    """
    while True:
        copy_buffer = source_file.read(buffer_size)
        if not copy_buffer:
            break
        destination_file.write(copy_buffer)

### 3.3. Enrich the code

To enrich the code, we use the following steps:

1. Check if the path is not ignored
2. Create the corresponding output folder if it does not exits
3. Loop on all the files in the input folder

For each file the following steps are performed:

1. Check if the file has the right extension
2. Read the file
3. Generate the description of the implementation
4. Generate the documented code
5. Add the description to the documented code
6. Write the enriched code to the output file

If the file does not have the right extension, it is copied to the output folder.

In [None]:
for dirpath, dirnames, filenames in os.walk(code_root_directory):
    if not is_path_ignored(dirpath):
        output_dirpath = get_output_dirpath(dirpath)
        # Create output directory if it does not exist
        if not os.path.exists(output_dirpath):
            os.makedirs(output_dirpath)

        for filename in filenames:
            if is_file_valid_code_file(filename):
                # Check if the file exists in the output directory, if it does:
                #  - if replace_code_file is True, delete the file and create a new one
                #  - if replace_code_file is False, continue to the next file
                output_filepath = os.path.join(output_dirpath, filename)
                if os.path.file_exists(output_filepath):
                    if replace_code_file:
                        os.remove(output_filepath)
                    else:
                        continue

                # Read the code file
                code_filepath = os.path.join(dirpath, filename)
                with open(code_filepath, 'r') as code_file:
                    code = code_file.read()
                    # Generate description
                    description_response = openai.ChatCompletion.create(
                        engine=gpt_model_name,
                        messages=[
                            system_command_description,
                            {'role': 'user', 'content': code}],
                        temperature=0.5,
                        max_tokens=800,
                        top_p=0.95,
                        frequency_penalty=0,
                        presence_penalty=0,
                        stop=None
                    )
                    # Generate documented code
                    documented_code_response = openai.ChatCompletion.create(
                        engine=gpt_model_name,
                        messages=[
                            system_command_documented_code,
                            {'role': 'user', 'content': code}],
                        temperature=0.2,
                        max_tokens=2000,
                        top_p=0.95,
                        frequency_penalty=0,
                        presence_penalty=0,
                        stop=None
                    )
                    response = code_comment_block_start + '\n' + description_response['choices'][0]['message']['content'] + '\n\n' + documented_code_response['choices'][0]['message']['content']
                    # Save the enriched code
                    with open(output_filepath, 'w') as output_file:
                        output_file.write(response)
                        print('File {} enriched successfully.'.format(output_filepath))
            else:
                # Copy the file to the output directory
                source_filepath = os.path.join(dirpath, filename)
                output_filepath = os.path.join(output_dirpath, filename)
                if os.path.file_exists(output_filepath):
                    if replace_code_file:
                        os.remove(output_filepath)
                    else:
                        continue
                with open(source_filepath, 'rb') as source_file, open(output_filepath, 'wb') as destination_file:
                    copy_file_to_destination(source_file, destination_file)
                    print('File {} copied successfully.'.format(output_filepath))