# Generating suggestions for writing source code in C# language based on NLP.


## GPT-2 approach

#### This notebook was created and adapted for the work of generating suggestions using some ideas and codes as reference the notebook of the author "Kayal, Arshabhi" available at: 
https://towardsdatascience.com/train-gpt-2-in-your-own-language-fc6ad4d60171

### Import libraries.

In [None]:
!pip install tokenizers

Collecting tokenizers
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 9.7 MB/s 
[?25hInstalling collected packages: tokenizers
Successfully installed tokenizers-0.10.3


In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.12.3-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 9.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 9.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 77.6 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 57.9 MB/s 
Installing collected packages: pyyaml, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed huggingface-hub-0.1.2 pyyaml-6.0 sacremoses-0.0.46 transformers-4.12.3


In [None]:
import os
import ntpath   
from chardet import detect
import nltk
import re
import h5py
import numpy as np
from toolz import unique
import pickle
import pandas as pd
from tokenizers.models import BPE
from tokenizers import Tokenizer
from tokenizers.decoders import ByteLevel as ByteLevelDecoder
from tokenizers.normalizers import NFKC, Sequence
from tokenizers.pre_tokenizers import ByteLevel
from tokenizers.trainers import BpeTrainer
from pathlib import Path
import codecs
import tensorflow as tf
from transformers import GPT2Config, TFGPT2LMHeadModel, GPT2Tokenizer, WEIGHTS_NAME, CONFIG_NAME
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential, load_model
import csv

#### Classes

In [None]:
class BPE_token(object):
    
    def __init__(self):
        self.tokenizer = Tokenizer(BPE())
        self.tokenizer.pre_tokenizer = ByteLevel()
        self.tokenizer.normalizer = Sequence([
            NFKC()
        ])
        self.tokenizer.decoder = ByteLevelDecoder()
        
    def bpe_train(self, paths):
        trainer = BpeTrainer(show_progress=True, 
                             inital_alphabet=ByteLevel.alphabet(), 
                             special_tokens=["<s>",
                                             "<pad>",
                                             "</s>",
                                             "<unk>",
                                             "<mask>"
                                            ])
        self.tokenizer.train(paths, trainer)

    def save_tokenizer(self, location, prefix=None):
        if not os.path.exists(location):
            os.makedirs(location)
        self.tokenizer.model.save(location, prefix)

### Generic functions.

In [None]:
def export_list_to_data_file(data, file_name):
    """
    Description: Function to export data into data file.
    :param data: Data to export,
    :param file_name: file name to export.
    :return: void.
    """

    with open(file_name, 'wb') as filehandle:
        pickle.dump(data, filehandle)

In [None]:
def load_from_data_file(file_name):
    """
    Description: Function to load data from file.
    :param file_name: file name to load data from.
    :return - Type(list): Data list.
    """
    
    data = []

    with open(file_name, 'rb') as filehandle:
        data = pickle.load(filehandle)

    return data

In [None]:
def print_info(title, message = None, new_line = False):
    """
    Description: Function to print info on screen
    :param title: Message title,
    :param message: Message to print,
    :param new_line: Indicates whether the first message will start with a line break or not.
    
    :return: void.
    """
    
    if new_line:
        print('\n')
    
    print("####################################")
    print(title)
    print("####################################")
    
    if message:
        print("%s\n" % (message))

In [None]:
def get_sequence_of_numbers_from_string(str):
    """
    Description: Function to extract all the sequence of numbers from the given string.
    :param str: String to extract sequence of numbers.
    
    :return - Type(Array): Array with sequence of numbers.
    """
    
    array_numbers = re.findall(r'[0-9]+', str)
    
    return array_numbers

In [None]:
def replace_sequence_of_numbers_for_mask(str_to_replace, 
                                         array_sequence_numbers_to_search, 
                                         mask_to_replace):
    """
    Description: Function to replace sequence of numbers for specific mask.
    :param str_to_replace: String to replace sequence of numbers,
    :param array_sequence_numbers_to_search: Sequence numbers to search for,
    :param mask_to_replace: Mask to replace each sequence.
    
    :return - Type(String): String with sequence of numbers replaced by mask.
    """
    
    for number_sequence in array_sequence_numbers_to_search:
        str_to_replace = re.sub(str(number_sequence), mask_to_replace, str_to_replace, 1)

    return str_to_replace

In [None]:
def get_encoding_type(file):
    """
    Description: Function to retrieve enconding type of file.
    :param file: File to get enconding.
    
    :return - Type(String): String with enconding type of file.
    """
        
    with open(file, 'rb') as f:
        rawdata = f.read()
    return detect(rawdata)['encoding']

In [None]:
def change_enconding(source_file, enconding):
    """
    Description: Function to change enconding of file.
    :param source_file: File to change enconding,
    :param enconding: Enconding to replace in source_file.
    
    :return: void.
    """
    
    from_codec = get_encoding_type(source_file)
    
    try: 
        target_file = source_file.replace(ntpath.basename(source_file), 
                                      "123%s" % (ntpath.basename(source_file))) 
        
        with open(source_file, 
                  'r', 
                  encoding=from_codec) as f, open(target_file, 
                                                  'w', 
                                                  encoding=enconding) as e:
                text = f.read()
                e.write(text)
                f.close()

        os.remove(source_file) 
        os.rename(target_file, source_file) 
        
    except UnicodeDecodeError:
        print("Decode error for file: '%s'" % (source_file))
    except UnicodeEncodeError:
        print("Encode error for file: '%s'" % (source_file))

In [None]:
def check_utf8_encode(file_name):
    try:
        content = codecs.open(file_name, encoding="utf-8", errors="strict").readlines()

        if content is not None:
            return True
    except UnicodeDecodeError:
        return False

    return False

In [None]:
def flatten_list(list_to_flatten):
    """
    Description: Function to flatten the given list.
    :param list_to_flatten: List to flatten.
    
    :returns - Type(List): Flat list.
    """   
    
    return [f for child_list in list_to_flatten for f in child_list]

In [None]:
def remove_duplicate_items_from_list(list_to_remove_duplicates):
    """
    Description: Function to remove duplicate itens from given list.
    :param list_to_remove_duplicates: List to remove duplicates.
    
    :returns - Type(List): List without duplicates.
    """  
    
    return list(map(list, unique(map(tuple, list_to_remove_duplicates))))

In [None]:
def find_all_string_inside_another(complete_string, string_to_find):
    
    start = 0
    
    while True:
        start = complete_string.find(string_to_find, start)
        
        if start == -1: 
            return
        
        yield start
        
        start += len(string_to_find)

### Read C# repository functions.

#### Filter C# class files from root repository downladed from: https://github.com/dotnet

In [None]:
def get_all_c_sharp_complete_file_names_for_each_class(root_directory):
    """
    Description: Function to get all complete name of files with extension ".cs" (C# class).
    :param root_directory: Root directory of files.
    
    :return - Type(List): List with all file names of C# repository.
    """
    
    C_SHARP_CLASS_FILE_EXTENSION = ".cs"
    
    complete_name_of_files = []

    for root, dirs, files in os.walk(root_directory):
        for file in files:
            if file.endswith(C_SHARP_CLASS_FILE_EXTENSION):
                complete_name_of_files.append(os.path.join(root, file))
    
    return complete_name_of_files

### Pre-processing functions.

In [None]:
def tokenize_all_files(complete_file_names, path_to_save_tokens):
    """
    Description: Function to tokenize all files and save into specific folder.
    :param complete_file_names: All C# files list (Name of each file),
    :param path_to_save_tokens: Path to save generated tokens.
    
    :returns - Void.
    """
    
    tokenizer = BPE_token()

    tokenizer.bpe_train([c for c in complete_file_names if check_utf8_encode(c) == True])

    tokenizer.save_tokenizer(path_to_save_tokens)

### GPT-2 Functions

In [None]:
def create_gpt2_model(tokens_path):
    
    tokenizer = GPT2Tokenizer.from_pretrained(tokens_path)
    tokenizer.add_special_tokens({
      "eos_token": "</s>",
      "bos_token": "<s>",
      "unk_token": "<unk>",
      "pad_token": "<pad>",
      "mask_token": "<mask>"
    })

    config = GPT2Config(
      vocab_size=tokenizer.vocab_size,
      bos_token_id=tokenizer.bos_token_id,
      eos_token_id=tokenizer.eos_token_id
    )

    model = TFGPT2LMHeadModel(config)
    
    return (model, tokenizer)

In [None]:
def create_string_list_tokens(complete_file_names, tokenizer):
    
    single_string = ''
    
    for filename in complete_file_names:
        with open(file_name, "r", encoding='utf-8') as f:
            if check_utf8_encode(file_name) == True:
                x = f.read()
                single_string += x + tokenizer.eos_token

    return tokenizer.encode(single_string)

In [None]:
def create_tf_dataset_for_gpt2_training(tokens_list):
    
    examples = []
    block_size = 100
    BATCH_SIZE = 12
    BUFFER_SIZE = 1000
    
    for i in range(0, len(tokens_list) - block_size + 1, block_size):
        examples.append(tokens_list[i:i + block_size])
    
    inputs, labels = [], []
    
    for ex in examples:
        inputs.append(ex[:-1])
        labels.append(ex[1:])
    
    dataset = tf.data.Dataset.from_tensor_slices((inputs, labels))
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
    
    return dataset

In [None]:
def config_model_to_gpt2(model):
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
    
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

    model.compile(optimizer=optimizer, loss=[loss, *[None] * model.config.n_layer], metrics=[metric])
    
    return model

In [None]:
def create_checkpoint_gpt2(filepath_to_save_checkpoint):
    checkpoint = ModelCheckpoint(filepath_to_save_checkpoint, monitor='logits_accuracy', verbose=1, save_best_only=True)
    callbacks_list = [checkpoint]

    return callbacks_list

In [None]:
def train_model_to_gpt2(model, dataset, num_epoch, batch_size, callbacks_list):
    
    history = model.fit(dataset, epochs=num_epoch, batch_size=batch_size, callbacks=callbacks_list)
    return history

In [None]:
def get_suggestions(previous_tokens, model, num_return_sequences, tokenizer):
    
    suggestions = []
    
    input_ids = tokenizer.encode(previous_tokens, return_tensors='tf')
    
    beam_output = model.generate(
      input_ids,
      max_length = 50,
      num_beams = 1,
      temperature = 0.3,
      no_repeat_ngram_size = 0,
      num_return_sequences = num_return_sequences,

    )

    for i in range(len(beam_output)):
        suggestions.append(tokenizer.decode(beam_output[i], skip_special_tokens = True))

    return suggestions

In [None]:
def save_model_to_external_file(folder_to_save_model, model_to_save):
    
    output_dir = './' + folder_to_save_model + '/'
    
    model_to_save = model.module if hasattr(model, 'module') else model
    
    output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
    
    output_config_file = os.path.join(output_dir, CONFIG_NAME)
    
    model.save_pretrained(output_dir)
    
    model_to_save.config.to_json_file(output_config_file)
    
    tokenizer.save_pretrained(output_dir)

### Format result for possible code that compiles

In [None]:
def select_type_code(suggestion):

    for_method = "("
    for_class = " Class"
    for_property = "{"

    # Method
    positions_method = [pos for pos, string in enumerate(suggestion) if string == for_method]
    first_position_method = len(suggestion) + 1

    if len(positions_method) > 0:
        first_position_method = positions_method[0]

    # Class
    positions_class = list(find_all_string_inside_another(suggestion, for_class))
    first_position_class = len(suggestion) + 1

    if len(positions_class) > 0:
        first_position_class = positions_class[0]

    first_position_property = len(suggestion) + 1

    # Property
    positions_property = [pos for pos, string in enumerate(suggestion) if string == for_property]
    if len(positions_property) > 0:
        first_position_property = positions_property[0]

    if len(positions_method) > 0 and first_position_method < first_position_class and first_position_method < first_position_property:
        return 1
    elif len(positions_class) > 0 and first_position_class < first_position_method and first_position_class < first_position_property:
        return 2
    elif len(positions_property) > 0 and first_position_property < first_position_method and first_position_property < first_position_class:
        return 3
    return 0


In [None]:
def remove_comments_from_suggestion(suggestion):

    suggestion_formated = ""
    suggestion_line_by_line = suggestion.split('\n')

    for s in suggestion_line_by_line:

        if not(s.startswith("//") or s.startswith("/*") or s.endswith("*/")):
            suggestion_formated += s + "\n"

    return suggestion_formated

In [None]:
def format_suggestion(previous_tokens, suggestion):
    try:
        suggestion = remove_comments_from_suggestion(suggestion)
        return '\n'.join(suggestion.split('\n')[:2])

    except:
        # Invalid suggestion
        return ""

    # Invalid suggestion
    return ""

### Generate common C# data to test suggestions

In [None]:
def load_csv_to_test_previous_tokens_list(csv_file_path):
    dataframe_tokens_test = pd.read_csv(csv_file_path, delimiter=";", header=None)

    previous_tokens_list = []

    for index, row in dataframe_tokens_test.iterrows():
        token = ""
        for column in range(len(dataframe_tokens_test.columns.tolist())):
            if type(row[column]) == str:
                token += row[column] + " "

        previous_tokens_list.append(token[:-1])

    return previous_tokens_list

In [None]:
def save_suggestions_to_csv_file(file_path_csv, all_suggestions):
    data_to_save_in_csv = []

    for suggestions in all_suggestions:
        for i in range(len(suggestions[1])):
            data_to_save_in_csv.append([suggestions[0], format_suggestion(suggestions[0], suggestions[1][i])])

    with open(file_path_csv, 'w') as f:
      
        write = csv.writer(f)      
        write.writerows(data_to_save_in_csv)

### Main flow

#### Mounting Google Drive (For Colab)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Create constants for directories (Local Jupyter Notebook or Colab)

In [None]:
PATH_TO_TOKENS_LIST = ''
DIRECTORY_GENERATED_TOKENS = ''
PATH_CHECKPOINT_TRAIN = ''
DIRECTORY_TF_DATASET = ''
PATH_TO_TEST_FILE_1_WORD = ''

executing_in_colab = True

if executing_in_colab == True:
    PATH_TO_TOKENS_LIST = '/content/drive/MyDrive/TCC/GPT2/tokens_list.data'
    DIRECTORY_GENERATED_TOKENS = '/content/drive/MyDrive/TCC/GPT2/'
    PATH_CHECKPOINT_TRAIN = '/content/drive/MyDrive/TCC/GPT2/CheckPoint'
    DIRECTORY_TF_DATASET = '/content/drive/MyDrive/TCC/GPT2/TFDataset'
    PATH_TO_TEST_FILE_1_WORD = '/content/drive/MyDrive/TCC/TestFiles/previous_tokens_for_test_1_word.csv'
    PATH_TO_TEST_FILE_2_WORD = '/content/drive/MyDrive/TCC/TestFiles/previous_tokens_for_test_2_word.csv'
    PATH_TO_TEST_FILE_3_WORD = '/content/drive/MyDrive/TCC/TestFiles/previous_tokens_for_test_3_word.csv'
    PATH_TO_TEST_FILE_4_WORD = '/content/drive/MyDrive/TCC/TestFiles/previous_tokens_for_test_4_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_1_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_1_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_2_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_2_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_3_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_3_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_4_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_4_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_DATA_1_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_1_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_2_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_2_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_3_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_3_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_4_WORD = '/content/drive/MyDrive/TCC/TestFiles/suggestions_4_word.data'
else:
    PATH_TO_TOKENS_LIST = 'tokens_list.data'
    DIRECTORY_GENERATED_TOKENS = 'GPT2_Generated_Tokens'
    PATH_CHECKPOINT_TRAIN = 'CheckPoint'
    DIRECTORY_TF_DATASET = 'TFDataset'
    PATH_TO_TEST_FILE_1_WORD = 'previous_tokens_for_test_1_word.csv'
    PATH_TO_TEST_FILE_2_WORD = 'previous_tokens_for_test_2_word.csv'
    PATH_TO_TEST_FILE_3_WORD = 'previous_tokens_for_test_3_word.csv'
    PATH_TO_TEST_FILE_4_WORD = 'previous_tokens_for_test_4_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_1_WORD = 'suggestions_1_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_2_WORD = 'suggestions_2_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_3_WORD = 'suggestions_3_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_4_WORD = 'suggestions_4_word.csv'
    PATH_TO_SAVE_SUGGESTIONS_DATA_1_WORD = 'suggestions_1_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_2_WORD = 'suggestions_2_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_3_WORD = 'suggestions_3_word.data'
    PATH_TO_SAVE_SUGGESTIONS_DATA_4_WORD = 'suggestions_4_word.data'


#### Read files

In [None]:
# Define constants.
ROOT_DIRECTORY = "D:\DsTCC"

# Get all file names.
complete_file_names = get_all_c_sharp_complete_file_names_for_each_class(ROOT_DIRECTORY)

# Print first 10 files.
print_info("First 10 files:")

for file_name in complete_file_names[:10]:
    print(ntpath.basename(file_name)) 

# Print total number of files.
print_info("Number of files for GPT-2:", new_line=True)
print("%s files." % (len(complete_file_names)))

#### Tokenize all files.

In [None]:
tokens = tokenize_all_files(complete_file_names, DIRECTORY_GENERATED_TOKENS)

#### Create GPT-2 model and vocabulary for tokens.

In [None]:
(model, tokenizer) = create_gpt2_model(DIRECTORY_GENERATED_TOKENS)

file /content/drive/MyDrive/TCC/GPT2/config.json not found


#### Create tokens list

In [None]:
tokens_list = create_string_list_tokens(complete_file_names, tokenizer)

#### Export tokens_list to backup file.

In [None]:
export_list_to_data_file(tokens_list, 'tokens_list.data')

#### Load tokens_list from backup file.

In [None]:
tokens_list = load_from_data_file(PATH_TO_TOKENS_LIST)

#### Show words positions of created list

In [None]:
print_info("First 5 tokens positions from list:")

for token in tokens_list[:5]:
    print("Position in vocabulary: %s" % (token))

#### Create tensor flow dataset for training

In [None]:
dataset = create_tf_dataset_for_gpt2_training(tokens_list)

#### Export tensorflow dataset to backup file

In [None]:
tf.data.experimental.save(dataset, DIRECTORY_TF_DATASET)

Load tensorflow dataset from backup file

In [None]:
dataset = tf.data.experimental.load(DIRECTORY_TF_DATASET)

#### Configure model and compile

In [None]:
model = config_model_to_gpt2(model)

#### Train model

In [None]:
NUM_EPOCH = 5
BATCH_SIZE = 10

callbacks_list = create_checkpoint_gpt2(PATH_CHECKPOINT_TRAIN)

history = train_model_to_gpt2(model, dataset, NUM_EPOCH, BATCH_SIZE, callbacks_list)

#### Export model to backup files

In [None]:
FOLDER_TO_SAVE_MODEL = 'model_backup'
save_model_to_external_file(FOLDER_TO_SAVE_MODEL, model)

#### Load model from backup

In [None]:
model.load_weights(PATH_CHECKPOINT_TRAIN)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f31ada0a650>

#### Get suggestions for the most common C# word

In [None]:
previous_tokens = 'private static void'
num_return_sequences = 1
suggestions = get_suggestions(previous_tokens, model, num_return_sequences, tokenizer)

for i in range(len(suggestions)):
    print_info("Suggestion %s:" % (i + 1), format_suggestion(previous_tokens, suggestions[i]))

Setting `pad_token_id` to 2 (first `eos_token_id`) to generate sequence


####################################
Suggestion 1:
####################################
private static void ResolveExecutablePath(ref string executable, ref string args)
        {



### Get suggestions for 1 word:

In [None]:
all_suggestions_1_word = load_from_data_file(PATH_TO_SAVE_SUGGESTIONS_DATA_1_WORD)

In [None]:
num_return_sequences = 1
previous_tokens_to_test = load_csv_to_test_previous_tokens_list(PATH_TO_TEST_FILE_1_WORD)

all_suggestions_1_word = []

for previous_tokens in previous_tokens_to_test:
    all_suggestions_1_word.append((previous_tokens, get_suggestions(previous_tokens, model, num_return_sequences, tokenizer)))

In [None]:
save_suggestions_to_csv_file(PATH_TO_SAVE_SUGGESTIONS_1_WORD, all_suggestions_1_word)

In [None]:
export_list_to_data_file(all_suggestions_1_word, PATH_TO_SAVE_SUGGESTIONS_DATA_1_WORD)

In [None]:
for suggestions in all_suggestions_1_word:
    print("###############################################################################")
    print("Previous tokens: " + suggestions[0])
    for i in range(len(suggestions[1])):
        print("Suggestion: " + format_suggestion(suggestions[0], suggestions[1][i]))
        print(" ")

    print("###############################################################################")

###############################################################################
Previous tokens: abstract
Suggestion: abstract.

 
###############################################################################
###############################################################################
Previous tokens: as
Suggestion: as.

 
###############################################################################
###############################################################################
Previous tokens: base
Suggestion: base.

 
###############################################################################
###############################################################################
Previous tokens: bool
Suggestion: bool.

 
###############################################################################
###############################################################################
Previous tokens: break
Suggestion: break.

 
###########################################################

### Get suggestions for 2 word:

In [None]:
all_suggestions_2_word = load_from_data_file(PATH_TO_SAVE_SUGGESTIONS_DATA_2_WORD)

In [None]:
num_return_sequences = 1
previous_tokens_to_test = load_csv_to_test_previous_tokens_list(PATH_TO_TEST_FILE_2_WORD)

all_suggestions_2_word = []

for previous_tokens in previous_tokens_to_test:
    all_suggestions_2_word.append((previous_tokens, get_suggestions(previous_tokens, model, num_return_sequences, tokenizer)))


In [None]:
export_list_to_data_file(all_suggestions_2_word, PATH_TO_SAVE_SUGGESTIONS_DATA_2_WORD)

In [None]:
save_suggestions_to_csv_file(PATH_TO_SAVE_SUGGESTIONS_2_WORD, all_suggestions_2_word)

In [None]:
for suggestions in all_suggestions_2_word:
    print("###############################################################################")
    print("Previous tokens: " + suggestions[0])
    for i in range(len(suggestions[1])):
        print("Suggestion: " + format_suggestion(suggestions[0], suggestions[1][i]))
        print(" ")

    print("###############################################################################")

###############################################################################
Previous tokens: public abstract
Suggestion: public abstract.

 
###############################################################################
###############################################################################
Previous tokens: base .
Suggestion: base.NET Foundation licenses this file to you under the MIT license.

 
###############################################################################
###############################################################################
Previous tokens: private bool
Suggestion: private bool ShouldUseCmd(string executable)
        {
 
###############################################################################
###############################################################################
Previous tokens: public byte
Suggestion: public byte.

 
###############################################################################
###############################

### Get suggestions for 3 word:

In [None]:
all_suggestions_3_word = load_from_data_file(PATH_TO_SAVE_SUGGESTIONS_DATA_3_WORD)

In [None]:
num_return_sequences = 1
previous_tokens_to_test = load_csv_to_test_previous_tokens_list(PATH_TO_TEST_FILE_3_WORD)

all_suggestions_3_word = []

for previous_tokens in previous_tokens_to_test:
    all_suggestions_3_word.append((previous_tokens, get_suggestions(previous_tokens, model, num_return_sequences, tokenizer)))


In [None]:
save_suggestions_to_csv_file(PATH_TO_SAVE_SUGGESTIONS_3_WORD, all_suggestions_3_word)

In [None]:
export_list_to_data_file(all_suggestions_3_word, PATH_TO_SAVE_SUGGESTIONS_DATA_3_WORD)

In [None]:
for suggestions in all_suggestions_3_word:
    print("###############################################################################")
    print("Previous tokens: " + suggestions[0])
    for i in range(len(suggestions[1])):
        print("Suggestion: " + format_suggestion(suggestions[0], suggestions[1][i]))
        print(" ")

    print("###############################################################################")

###############################################################################
Previous tokens: public abstract class
Suggestion: public abstract class CommandFactory : ICommandFactory
    {
 
###############################################################################
###############################################################################
Previous tokens: base . M
Suggestion: base. M.

 
###############################################################################
###############################################################################
Previous tokens: private bool P
Suggestion: private bool P.

 
###############################################################################
###############################################################################
Previous tokens: public byte [
Suggestion: public byte [.

 
###############################################################################
########################################################################

### Get suggestions for 4 word:

In [None]:
all_suggestions_4_word = load_from_data_file(PATH_TO_SAVE_SUGGESTIONS_DATA_4_WORD)

In [None]:
num_return_sequences = 1
previous_tokens_to_test = load_csv_to_test_previous_tokens_list(PATH_TO_TEST_FILE_4_WORD)

all_suggestions_4_word = []

for previous_tokens in previous_tokens_to_test:
    all_suggestions_4_word.append((previous_tokens, get_suggestions(previous_tokens, model, num_return_sequences, tokenizer)))


In [None]:
save_suggestions_to_csv_file(PATH_TO_SAVE_SUGGESTIONS_4_WORD, all_suggestions_4_word)

In [None]:
export_list_to_data_file(all_suggestions_4_word, PATH_TO_SAVE_SUGGESTIONS_DATA_4_WORD)

In [None]:
for suggestions in all_suggestions_4_word:
    print("###############################################################################")
    print("Previous tokens: " + suggestions[0])
    for i in range(len(suggestions[1])):
        print("Suggestion: " + format_suggestion(suggestions[0], suggestions[1][i]))
        print(" ")

    print("###############################################################################")

###############################################################################
Previous tokens: public bool IsVerified {
Suggestion: public bool IsVerified {
            // So, do a quick path search to see if we can just directly invoke it
 
###############################################################################
###############################################################################
Previous tokens: public string Message {
Suggestion: public string Message {
            // So, do a quick path search to see if we can just directly invoke it
 
###############################################################################
###############################################################################
Previous tokens: public int Count {
Suggestion: public int Count {
            return Create(executable, ArgumentEscaper.EscapeAndConcatenateArgArrayForProcessStart(args));
 
###############################################################################
###################