In [1]:
import pydoc
import ast
import importlib
import re
import json

In [2]:
# Response from LLM
res = '''# This is a sample np.array(np.ones(5)) and another np.sum example
# Comment to check inline many functions

import numpy as np
from matplotlib.pyplot import show
 
x = np.linspace(0, 1, 100)
y = np.sin(2 * np.pi * x)

show()
pyplot.plot(x, y)
pyplot.show()
'''

# Extract imports from the file

In [3]:
packages = []
import_tree = ast.parse(res)

for node in ast.walk(import_tree):
    if 'names' in node.__dir__():
        # import numpy.array as npArray
        if 'module' in node.__dir__():
            # from matplotlib.pyplot import show
            packages.append({"module": node.module, "name": node.names[0].name})
        else:
            packages.append({"name": node.names[0].name, "asname": node.names[0].asname})

print(packages)

[{'name': 'numpy', 'asname': 'np'}, {'module': 'matplotlib.pyplot', 'name': 'show'}]


# Replace alias with package name

In [4]:
lines = res.splitlines()

In [5]:
nps = re.findall(fr'{packages[0]["asname"]}\.[a-zA-Z]+', res)

In [6]:
print(nps)

['np.array', 'np.ones', 'np.sum', 'np.linspace', 'np.sin', 'np.pi']


In [7]:
pakNames = []

In [8]:
'module' in packages[-1]

True

In [9]:
documentations = []

In [17]:
import sys
"matplotlib" in sys.modules

True

In [10]:
for package in packages:
    if "module" in package:
        package_module = package["module"]
        module_function = package["name"]
        
        spec = importlib.util.find_spec(package_module)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

        function_definition = getattr(module, module_function)
        documentation = pydoc.render_doc(function_definition)
        documentations.append({"module": ".".join([package_module, module_function]), "documentation": documentation})
    else:
        last_module = None
        package_module = package["name"]
        module_function = package["asname"]

        if module_function is None:
            package_references = re.findall(rf"{package['name']}\.[a-zA-Z]+", res)
            package_references = [package_reference.rsplit('.', 1) for package_reference in package_references]
            
            for package_module, module_function in package_references:
                spec = importlib.util.find_spec(package_module)
                module = importlib.util.module_from_spec(spec)
                spec.loader.exec_module(module)
                function_definition = getattr(module, module_function)
                documentation = pydoc.render_doc(function_definition)
                documentaions.append({"module": ".".join([package_module, module_function]), "documentation": documentation})
        else:
            package_references = re.findall(rf"{package['asname']}\.[a-zA-Z]+", res)
            package_references = [re.sub(module_function, package_module, package_reference) for package_reference in package_references]    
            for package_reference in package_references:
                package_module, package_function = package_reference.split(".")
                if last_module != package_module:
                    spec = importlib.util.find_spec(package_module)
                    module = importlib.util.module_from_spec(spec)
                    spec.loader.exec_module(module)
                    last_module = module
                function_definition = getattr(module, package_function)
                documentation = pydoc.render_doc(function_definition)
                documentations.append({"module": package_reference, "documentation": documentation})

with open("example-documentation.json", "w") as json_file:
    json.dump(documentations, json_file)

  spec.loader.exec_module(module)
  spec.loader.exec_module(module)
  spec.loader.exec_module(module)
  spec.loader.exec_module(module)
  spec.loader.exec_module(module)
  spec.loader.exec_module(module)


In [19]:
import tiktoken
import 

In [25]:
enc = tiktoken.get_encoding("p50k_base")
here = enc.encode("\n".join([documentation["documentation"] for documentation in documentations]))

In [26]:
len(here)

7984

In [12]:
documentations

[{'module': 'numpy.array',
 {'module': 'numpy.ones',
  'documentation': "Python Library Documentation: function ones in module numpy\n\no\x08on\x08ne\x08es\x08s(shape, dtype=None, order='C', *, like=None)\n    Return a new array of given shape and type, filled with ones.\n    \n    Parameters\n    ----------\n    shape : int or sequence of ints\n        Shape of the new array, e.g., ``(2, 3)`` or ``2``.\n    dtype : data-type, optional\n        The desired data-type for the array, e.g., `numpy.int8`.  Default is\n        `numpy.float64`.\n    order : {'C', 'F'}, optional, default: C\n        Whether to store multi-dimensional data in row-major\n        (C-style) or column-major (Fortran-style) order in\n        memory.\n    like : array_like, optional\n        Reference object to allow the creation of arrays which are not\n        NumPy arrays. If an array-like passed in as ``like`` supports\n        the ``__array_function__`` protocol, the result will be defined\n        by it. In thi

In [11]:
spec = importlib.util.

SyntaxError: invalid syntax (2074393657.py, line 1)

In [None]:
re.findall(r'np\.[a-zA-Z]+', res)

In [None]:
print(documentations)

In [None]:
pakDef = [re.sub(packages[0]["asname"], packages[0]["name"], np) for np in nps]

In [None]:
pakDef

In [None]:
module = importlib.import_module("matplotlib.pyplot")

In [None]:
module = importlib.import_module("matplotlib")

In [None]:
help(importlib.util.find_spec)

In [None]:
help(importlib.util.module_from_spec)

In [None]:
spec = importlib.util.find_spec("numpy")
print(help(spec.loader.exec_module))

In [None]:
spec = importlib.util.find_spec("matplotlib")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

In [None]:
import matplotlib

In [None]:
matplotlib.pyplot.show

In [None]:
func = eval("matplotlib"+".pyplot")

In [None]:
getattr(module, "pyplot.show")

In [None]:
getattr(getattr(module, "pyplot.show"), "")

In [None]:
for function in ["pyplot", "show"]:
    func = getattr(

In [None]:
import matplotlib

In [None]:
matplotlib.pyplot.__doc__

In [None]:
import importlib.util
import inspect

def get_function_docs(module_name, function_name):
    try:
        # Import the module dynamically
        spec = importlib.util.find_spec(module_name)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

        functions = function_name.split(".")

        getattr(getattr(module, functions[0]), function[1])

        func = getattr(module, functions[0])
        for function in functions[1:]:
            func = getattr(func, function)
        print(func)
        # Get the function dynamically
        function = getattr(module, function_name)

        # Use inspect to get documentation
        docstring = inspect.getdoc(func)
        return docstring
    except (ImportError, AttributeError):
        return None

if __name__ == "__main__":
    module_name = "matplotlib"
    function_name = "pyplot.show"

    docstring = get_function_docs(module_name, function_name)

    if docstring:
        print(f"Documentation for {module_name}.{function_name}:\n{docstring}\n")
    else:
        print(f"No documentation found for {module_name}.{function_name}\n")

In [None]:
import matplotlib

In [None]:
matplotlib.pyplot.show.__doc__

In [None]:
help(getattr(getattr(module, "pyplot"), "show"))

In [None]:
print(module.__doc__)

In [None]:
module = importlib.import_module(packages[0]["name"])

In [None]:
del numpy

In [None]:
print(help(getattr(module, pakDef[0].split(".")[-1])))

In [None]:
re.sub(fr'{packages[0]["asname"]}', packages[0]["name"], nps)

In [None]:
print(pydoc.render_doc(re.search))

In [None]:
print(pydoc.render_doc(re.match))

In [None]:
re.sub(fr'{packages[0]["asname"]}\.[a-zA-Z]+', packages[0]['name'], res)

In [None]:
print(help(re.sub))

In [None]:
print(re.__doc__)

In [None]:
print(help(re))

In [None]:
print(pydoc.render_doc(re.sub))

In [None]:
print(pydoc.render_doc(re.search))

# The problem is that there might be other definitionas similar to this that can not be detected

In [None]:
print(re.Pattern.__doc__)
# BTW, This has huge doc for other two methods

In [None]:
print(re.sub.__doc__)

In [None]:
print(help(re.sub))

In [None]:
print(pydoc.render_doc(re.sub))