### Modified example from https://github.com/turboderp/exllamav2/blob/master/examples/inference.py - simple text input->text output stored in a variable

In [1]:
#!python --version

In [2]:
from exllamav2 import(
    ExLlamaV2,
    ExLlamaV2Config,
    ExLlamaV2Cache,
    ExLlamaV2Tokenizer,
    ExLlamaV2Cache_Q4,
)

from exllamav2.generator import (
    ExLlamaV2BaseGenerator,
    ExLlamaV2Sampler
)

import time

In [3]:
# Initialize model and cache

model_directory =  "/shared/analyst/models/Meta-Llama-3-70B-Instruct-4.0bpw-h6-exl2"#"/shared/analyst/models/Llama-3-8B-Instruct-262k-5.0bpw-h6-exl2"

config = ExLlamaV2Config(model_directory)
config.prepare()
config.max_seq_len = 32000
model = ExLlamaV2(config)
cache = ExLlamaV2Cache_Q4(model, lazy = True, max_seq_len=config.max_seq_len)
model.load_autosplit(cache)
tokenizer = ExLlamaV2Tokenizer(config)

# Initialize generator

generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)

# Generate some text

settings = ExLlamaV2Sampler.Settings()
settings.temperature = 0.85
settings.top_k = 50
settings.top_p = 0.8
settings.token_repetition_penalty = 1.01
settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])

In [6]:
import ast
import textwrap

def get_docstring(node):
    if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str):
        return node.body[0].value.s.strip()
    return None

def get_function_body(source_lines, start_lineno, end_lineno):
    return "\n".join(source_lines[start_lineno - 1:end_lineno])

def parse_python_file(file_path):
    functions = []

    with open(file_path, "r") as file:
        source_lines = file.readlines()
        tree = ast.parse("".join(source_lines), filename=file_path)

    for node in tree.body:
        if isinstance(node, ast.FunctionDef):
            # Get the start and end line numbers of the function definition
            start_lineno = node.lineno
            end_lineno = node.body[-1].lineno if node.body else node.lineno

            # Grab any comments preceding the function definition
            docstring = get_docstring(node)
            if not docstring:
                continue

            # Get the function body including comments and docstrings
            function_body = get_function_body(source_lines, start_lineno, end_lineno)

            # Split docstring into description and examples
            description, _, examples = docstring.partition("Examples")

            # Remove extra leading/trailing whitespaces
            description = description.strip()

            # Format examples nicely
            examples = textwrap.dedent(examples.strip())

            function_split = function_body.split('"""')
            if len(function_split) > 2:
                function_body = function_split[0] + function_split[2]
            description_split = description.split("Examples\n--------")
            description = description_split[0]

            functions.append((node.name, function_body, description, examples))

    return functions
def get_specific_function(functions, desired_function):
    for func_name, function_body, description, examples in functions:
        if func_name == desired_function:
            return func_name, function_body, description, examples
    return "Unable to Find"
def generate_prompt(function1, function2, function3, unknown_function):
    prompt = "Function: " + function1[0] + "\nFunction Body: " + function1[1] + "\nDocstrings: "+ function1[2] + "\nExamples: "+ function1[3]
    prompt += "\nFunction: " + function2[0] + "\nFunction Body: " + function2[1] + "\nDocstrings: "+ function2[2] + "\nExamples: "+ function2[3]
    prompt += "\nFunction: " + function3[0] + "\nFunction Body: " + function3[1] + "\nDocstrings: "+ function3[2] + "\nExamples: "+ function3[3]
    prompt += "\nFunction: " + unknown_function[0] + "\nFunction Body: " + unknown_function[1] + "\nDocstrings: "+ unknown_function[2] + "\nExamples: ------"
    return prompt    
def write_to_file(filename, string):
    with open(filename, 'a') as file:
        file.write("Assistant" + string + "\n\n")
        file.write("-------------------------------------------------------------------------------------------------\n\n")
    print("finished")

def get_examples(functions):
    file_path = "_linalg.py"
    lin_functions = parse_python_file(file_path)
    
    function1 = get_specific_function(lin_functions, "inv")
    function2 = get_specific_function(lin_functions, "det")
    function3 = get_specific_function(lin_functions, "eigvals")

    file_path = "core.py"
    
    core_functions = parse_python_file(file_path)
    
    function4 = get_specific_function(core_functions, "minimum_fill_value")
    function5 = get_specific_function(core_functions, "asarray")
    
    for function in functions:
        unknown_function = function
        unknown_function_name = function[0]
        prompt = generate_prompt(function1, function3, function5, unknown_function)
    
        system_prompt = "You take in three example functions with sections Function, Function Body, Docstrings, and Examples. "
        system_prompt += "You are given a fourth function, function body, and docstring. " 
        system_prompt += f"Please write examples for the {unknown_function_name} function. " 
        system_prompt += f"Provide only examples for the {unknown_function_name} function. "
        system_prompt += "Do not provide repeat examples. "
        system_prompt += "Do not start an example without finishing it. "
        
        texts = [f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>\n"]
        texts.append(f'<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>')
        rendered_prompt = ''.join(texts)

        rendered_prompt
        tokenizer.encode(rendered_prompt)
        tokenizer.encode(rendered_prompt).shape[-1]

        max_new_tokens = 512

        generator.warmup()
        time_begin = time.time()

        output = generator.generate_simple(rendered_prompt, settings, max_new_tokens, seed = 1234)

        time_end = time.time()
        time_total = time_end - time_begin

        #print(output)
        #print()
        print(f"Response generated in {time_total:.2f} seconds, {max_new_tokens} tokens, {max_new_tokens / time_total:.2f} tokens/second")
        assistant_texts = output.split("assistant")
        write_to_file("examples_testing_70.txt", assistant_texts[1])

In [None]:
file_path = "_linalg.py"
functions = parse_python_file(file_path)
list_of_functions = []
list_of_functions.append(get_specific_function(functions, "svdvals"))
list_of_functions.append(get_specific_function(functions, "cond"))
list_of_functions.append(get_specific_function(functions, "matrix_rank"))
list_of_functions.append(get_specific_function(functions, "eigh"))
get_examples(list_of_functions)

Response generated in 165.27 seconds, 512 tokens, 3.10 tokens/second
finished
Response generated in 175.89 seconds, 512 tokens, 2.91 tokens/second
finished
Response generated in 183.31 seconds, 512 tokens, 2.79 tokens/second
finished
