We create a prompt that you can pass into an LLM which includes several examples of function code and corresponding docstrings from a given file followed by code for a function that we wish to predict docstrings for. 

- [GPT4 was used](https://chat.openai.com/share/f82d5849-7224-464e-a578-d543191c8e30) to help create this file.

In [3]:
import ast

def extract_function_code_and_docstring(file_path, function_name):
    with open(file_path, "r") as file:
        source_code = file.read()

    # Parse the source code into an abstract syntax tree (AST)
    tree = ast.parse(source_code)

    class FunctionExtractor(ast.NodeVisitor):
        def __init__(self):
            self.function_node = None
            self.docstring = None

        def visit_FunctionDef(self, node):
            if node.name == function_name:
                self.function_node = node
                self.docstring = ast.get_docstring(node)
            self.generic_visit(node)

    # Instantiate and use the extractor
    extractor = FunctionExtractor()
    extractor.visit(tree)

    if extractor.function_node:
        # Check if the first statement is a docstring and remove it
        if isinstance(extractor.function_node.body[0], ast.Expr) and isinstance(extractor.function_node.body[0].value, ast.Constant):
            del extractor.function_node.body[0]

        # Convert the function code back into Python code including decorators
        function_code = ast.unparse(extractor.function_node)

        # Format the docstring with triple quotes if present
        docstring_with_quotes = f'"""\n{extractor.docstring}\n"""' if extractor.docstring else None

        # Return both the function code without the docstring and the docstring itself
        return [function_code, docstring_with_quotes]
    else:
        return [f"Function '{function_name}' not found in {file_path}", None]


In [4]:
# Example usage
file_path = "_linalg.py"
function_name = "tensorsolve"
result = extract_function_code_and_docstring(file_path, function_name)

# Result contains the function code without docstring and the docstring itself
function_code_without_docstring = result[0]
docstring = result[1]

print("Function Code Without Docstring:")
print(function_code_without_docstring)

print("\nDocstring:")
print(docstring)

Function Code Without Docstring:
@array_function_dispatch(_tensorsolve_dispatcher)
def tensorsolve(a, b, axes=None):
    a, wrap = _makearray(a)
    b = asarray(b)
    an = a.ndim
    if axes is not None:
        allaxes = list(range(0, an))
        for k in axes:
            allaxes.remove(k)
            allaxes.insert(an, k)
        a = a.transpose(allaxes)
    oldshape = a.shape[-(an - b.ndim):]
    prod = 1
    for k in oldshape:
        prod *= k
    if a.size != prod ** 2:
        raise LinAlgError('Input arrays must satisfy the requirement             prod(a.shape[b.ndim:]) == prod(a.shape[:b.ndim])')
    a = a.reshape(prod, prod)
    b = b.ravel()
    res = wrap(solve(a, b))
    res.shape = oldshape
    return res

Docstring:
"""
Solve the tensor equation ``a x = b`` for x.

It is assumed that all indices of `x` are summed over in the product,
together with the rightmost indices of `a`, as is done in, for example,
``tensordot(a, x, axes=x.ndim)``.

Parameters
----------
a : arr

In [5]:
# Example usage
file_path = "_linalg.py"
function_name = "transpose"
result = extract_function_code_and_docstring(file_path, function_name)

# Result contains the function code without docstring and the docstring itself
function_code_without_docstring = result[0]
docstring = result[1]

print("Function Code Without Docstring:")
print(function_code_without_docstring)

print("\nDocstring:")
print(docstring)

Function Code Without Docstring:
def transpose(a):
    return swapaxes(a, -1, -2)

Docstring:
"""
Transpose each matrix in a stack of matrices.

Unlike np.transpose, this only swaps the last two axes, rather than all of
them

Parameters
----------
a : (...,M,N) array_like

Returns
-------
aT : (...,N,M) ndarray
"""


In [6]:
# Example usage
file_path = "_linalg.py"
function_name = "solve"
result = extract_function_code_and_docstring(file_path, function_name)

# Result contains the function code without docstring and the docstring itself
function_code_without_docstring = result[0]
docstring = result[1]

print("Function Code Without Docstring:")
print(function_code_without_docstring)

print("\nDocstring:")
print(docstring)

Function Code Without Docstring:
@array_function_dispatch(_solve_dispatcher)
def solve(a, b):
    a, _ = _makearray(a)
    _assert_stacked_2d(a)
    _assert_stacked_square(a)
    b, wrap = _makearray(b)
    t, result_t = _commonType(a, b)
    if b.ndim == 1:
        gufunc = _umath_linalg.solve1
    else:
        gufunc = _umath_linalg.solve
    signature = 'DD->D' if isComplexType(t) else 'dd->d'
    with errstate(call=_raise_linalgerror_singular, invalid='call', over='ignore', divide='ignore', under='ignore'):
        r = gufunc(a, b, signature=signature)
    return wrap(r.astype(result_t, copy=False))

Docstring:
"""
Solve a linear matrix equation, or system of linear scalar equations.

Computes the "exact" solution, `x`, of the well-determined, i.e., full
rank, linear matrix equation `ax = b`.

Parameters
----------
a : (..., M, M) array_like
    Coefficient matrix.
b : {(M,), (..., M, K)}, array_like
    Ordinate or "dependent variable" values.

Returns
-------
x : {(..., M,), (..

In [7]:
file_path = "_linalg.py"
function_name = "tensorsolve"

def print_function_then_docstring(file_path,function_name):
    result = extract_function_code_and_docstring(file_path, function_name)
    
    function_code_without_docstring = result[0]
    docstring = result[1]
    
    print("Function Code Without Docstring:")
    print(function_code_without_docstring)
    
    print("\nDocstring:")
    print(docstring)
    print("\n")
    

def print_function_prompt_docstring(file_path,function_name):
    result = extract_function_code_and_docstring(file_path, function_name)
    
    function_code_without_docstring = result[0]

    print("Function Code Without Docstring:")
    print(function_code_without_docstring)
    
    print("\nDocstring:")

function_names = [ "tensorsolve", "transpose", "solve"]
for function_name in function_names[:-1]:
    print_function_then_docstring(file_path, function_name)
print_function_prompt_docstring(file_path,function_names[-1])

Function Code Without Docstring:
@array_function_dispatch(_tensorsolve_dispatcher)
def tensorsolve(a, b, axes=None):
    a, wrap = _makearray(a)
    b = asarray(b)
    an = a.ndim
    if axes is not None:
        allaxes = list(range(0, an))
        for k in axes:
            allaxes.remove(k)
            allaxes.insert(an, k)
        a = a.transpose(allaxes)
    oldshape = a.shape[-(an - b.ndim):]
    prod = 1
    for k in oldshape:
        prod *= k
    if a.size != prod ** 2:
        raise LinAlgError('Input arrays must satisfy the requirement             prod(a.shape[b.ndim:]) == prod(a.shape[:b.ndim])')
    a = a.reshape(prod, prod)
    b = b.ravel()
    res = wrap(solve(a, b))
    res.shape = oldshape
    return res

Docstring:
"""
Solve the tensor equation ``a x = b`` for x.

It is assumed that all indices of `x` are summed over in the product,
together with the rightmost indices of `a`, as is done in, for example,
``tensordot(a, x, axes=x.ndim)``.

Parameters
----------
a : arr

Here are several results from GTP4.
- https://chatgpt.com/c/3e1400fa-de99-48cd-81e2-44de92d5a17c - hand copied over each function and docstring
- https://chatgpt.com/c/eca89beb-4dc3-4847-9848-9f329f720db1 - generated script entirely with code above, but no `print("\n")` after docstrings. 
- https://chatgpt.com/c/57736355-ef68-41bd-8046-221f5672d4b6 - Added extra enter at end of document (by accident) and then the entire function with docstrings was included. 
- https://chatgpt.com/c/2c7af451-c70a-43c5-bc61-bb32b30dbfad - Uses script above

Results vary slightly and have many similar parts. The output is quite similar to the actual docstring. 