In [1]:
import lionagi as li

from pathlib import Path
data_path = Path.cwd() / 'lionagi_data'     # Path to the data directory

In [2]:
# # load/chunk data

docs = li.load(
    input_dir=data_path, recursive=True, required_exts=[".py"], 
    to_lion=False
)

docs = [i for i in docs if len(i.text)> 100]

# chunks = li.chunk(
#     docs, chunker = "CodeSplitter", chunker_type = "llama_index", 
#     to_lion=False,
#     chunker_kwargs = {
#         "language": "python",
#         "chunk_lines": 100,
#         "chunk_lines_overlap": 10,
#         "max_chars": 2000,},
# )


In [3]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType

Settings.llm = OpenAI(model="gpt-4o")
Settings.embed_model = OpenAIEmbedding(
    model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE
)

In [4]:
# from llama_index.core import VectorStoreIndex

# index = VectorStoreIndex(chunks)
# index.storage_context.persist(persist_dir="./lionagi_index")

In [5]:
from llama_index.core import load_index_from_storage, StorageContext

index_id = "91fe61e0-89b5-4202-acff-435707e60119"

storage_context = StorageContext.from_defaults(persist_dir="./lionagi_index")
index = load_index_from_storage(storage_context, index_id=index_id)

In [6]:
from llama_index.core.postprocessor import LLMRerank

reranker = LLMRerank(choice_batch_size=10, top_n=5)
query_engine = index.as_query_engine(node_postprocessors=[reranker])

In [7]:
source_codes_responses = []

async def query_codebase(query):
    """
    Perform a query to a QA bot with access to a vector index built with package lionagi codebase

    Args:
        query (str): The query string to search for in the LionAGI codebase.

    Returns:
        str: The string representation of the response content from the codebase query.
    """
    response = await query_engine.aquery(query)
    source_codes_responses.append(response)
    return str(response.response)

In [10]:
instruction="""
write a good API documentation for this code, make sure you use query 
engine to check meanings of code concepts to accurately describe them, 
must integrate the information from query engine to verify the correctness 
of the documentation.
"""

edit = """
you asked a lot of good questions and got plenty answers, please integrate your 
conversation, be a lot more technical, you will be rewarded with 500 dollars for 
great work, and punished for subpar work, take a deep breath, you can do it
"""

In [23]:
from PROMPTS import sys_prompt
from lionagi.core.action import func_to_tool
from lionagi.libs import func_call, CallDecorator as cd

tools = func_to_tool(query_codebase)

model = li.iModel(
    model="gpt-4o", 
    provider="openai",
    interval_tokens=5_000_000,
    interval_requests=5_000,
    interval=60,
)

@cd.max_concurrency(20)
async def write_doc(context):
    try:
        branch = li.Branch(system=sys_prompt, tools=[query_codebase], imodel=model)
        
        form = await branch.direct(
            instruction=instruction,
            context = context,
            reason=True,
            score=True, 
            action_allowed=True,
            tools=tools,
        )
        
        final_doc = await branch.chat(
            instruction=edit,
            temperature=0.5,
        )
        
        form._add_field(
            field="final_documentation", 
            annotation = str,
            value = final_doc,
        )
        
        df = branch.to_df()
        df.to_csv(f"lion_doc_{branch.ln_id[:8]}.csv", index=False)
        
        return form, branch
    except Exception as e:
        print(e)
        return None, None


contexts = [i.text for i in docs]

In [None]:
results = await func_call.alcall(contexts, write_doc)

forms = [i[0] for i in results]
branches = [i[1] for i in results]

docs = [i.final_documentation for i in forms if i is not None]

In [None]:
# save each document to a file
for i, doc in enumerate(docs):
    with open(f"doc_{i}.txt", "w") as f:
        f.write(doc)