In [1]:
# %pip install lionagi==0.2.3

In [2]:
import lionagi as li

In [3]:
from pathlib import Path

data_path = Path.cwd() / "lionagi_data"  # Path to the data directory

### Prepare Data

In [4]:
# load files from directory
docs = li.load(
    input_dir=data_path, 
    recursive=True, 
    required_exts=[".py"]
)

docs = [doc for doc in docs if len(doc.content) > 100]

In [5]:
# # chunk
# pile = li.chunk(docs=docs, chunk_size=2000, overlap=0.1)

# # embed

# embed_model = li.iModel(
#     model="text-embedding-3-small", 
#     provider="openai",
#     endpoint="embeddings",
#     interval_tokens=2_000_000,
#     interval_requests=10_000,
#     interval=60,
#     # api_key = "YOUR_API_KEY_HERE" # Optional if you already put it in .env file
# )

# await pile.embed_pile(imodel=embed_model)

# # save
# pile.to_csv("lionagi_embedding.csv")

In [6]:
# reload pile from saved csv
pile = li.pile(csv_file="lionagi_embedding.csv")

### Construct Workflow

In [7]:
instruction = """
write a good API documentation for provided code, must use 
query engine to check meanings of related code concepts 
to accurately describe, for example if a name of a variable,
function, class, or module is used but not present in context,
- You MUST check with the query engine. 
- You MUST use extension at least ONCE. 
- You MUST make sure that documentation is accurate and detailed.
"""

In [8]:
from lionagi.libs.sys_util import SysUtil
SysUtil.check_import(
    package_name="llama_index", 
    module_name="core.postprocessor",
    import_name="LLMRerank",
    pip_name="llama-index"
)

In [9]:
tools = pile.as_query_tool(
    index_type = "llama_index",
    query_type = "query",
    name="qa_lionagi",
    guidance="Perform query to a QA bot with access to lionagi codebase",
    query_description="a term/phrase/question to lookup or a question to answer",
)

In [10]:
# print(await pile.query_pile("what is a branch in lionagi?"))

In [11]:
from PROMPTS import sys_prompt  # put your system prompt here

model = li.iModel(
    model="gpt-4o",
    provider="openai",
    interval_tokens=1_000_000,
    interval_requests=1_000,
    interval=60,
    temperature=0.4,
)

branch = li.Branch(system=sys_prompt, tools=tools, imodel=model)

form = await branch.direct(
    instruction=instruction,
    context=docs[249].content + docs[250].content,
    allow_action=True,
    allow_extension=True,
    verbose=True,
    max_extensions=2,
    retries=3,  # sometimes the model may fail to generate a valid response or refuse to take actions
)

Chatting with model...
Found action requests in model response. Processing actions...
Actions processed!
Analyzing action responses and generating answer...

--------------------------------------------------------------
Directive successfully completed!


In [12]:
branch.to_df()

Unnamed: 0,ln_id,message_type,timestamp,role,content,metadata,sender,recipient
0,7fd8d6aa87fb753c6299c1d9369ff067,System,2024-08-02T18:07:57.807908,system,"{'system_info': ' you are a helpful assistant,...",{'last_updated': {'recipient': '2024-08-02T18:...,system,24ccee67652ffa3c7d23f43aa38d89bd
1,9ef14a65e710375a0c437d55c72ddbf1,Instruction,2024-08-02T18:07:57.808698,user,{'instruction': '  ## Task Instructions...,{'last_updated': {'sender': '2024-08-02T18:07:...,user,24ccee67652ffa3c7d23f43aa38d89bd
2,5820d33e27018cbb8f3435bdd7aea919,AssistantResponse,2024-08-02T18:08:03.405651,assistant,"{'assistant_response': '```json {  ""answer""...",{'last_updated': {'sender': '2024-08-02T18:08:...,24ccee67652ffa3c7d23f43aa38d89bd,user
3,af7d00816d464031c07f505c472df71e,ActionRequest,2024-08-02T18:08:03.407183,assistant,"{'action_request': {'function': 'qa_lionagi', ...",{'last_updated': {'function': '2024-08-02T18:0...,24ccee67652ffa3c7d23f43aa38d89bd,898677341eaee8b6c0921a272e0846a5
4,a98f3e68fcf4b00ed46d2b681ba8072c,ActionRequest,2024-08-02T18:08:03.407327,assistant,"{'action_request': {'function': 'qa_lionagi', ...",{'last_updated': {'function': '2024-08-02T18:0...,24ccee67652ffa3c7d23f43aa38d89bd,898677341eaee8b6c0921a272e0846a5
5,bc93bee5283d63c8cc2a17eb2ef5437a,ActionRequest,2024-08-02T18:08:03.407436,assistant,"{'action_request': {'function': 'qa_lionagi', ...",{'last_updated': {'function': '2024-08-02T18:0...,24ccee67652ffa3c7d23f43aa38d89bd,898677341eaee8b6c0921a272e0846a5
6,30882315c0adafa191169a02e3c585f2,ActionRequest,2024-08-02T18:08:03.407517,assistant,"{'action_request': {'function': 'qa_lionagi', ...",{'last_updated': {'function': '2024-08-02T18:0...,24ccee67652ffa3c7d23f43aa38d89bd,898677341eaee8b6c0921a272e0846a5
7,d62f006d73a993ad4d801341faf4724b,ActionResponse,2024-08-02T18:08:05.566842,assistant,"{'action_response': {'function': 'qa_lionagi',...",{'last_updated': {'function': '2024-08-02T18:0...,898677341eaee8b6c0921a272e0846a5,24ccee67652ffa3c7d23f43aa38d89bd
8,f62eeeacc0717752407e35adc65958a6,ActionResponse,2024-08-02T18:08:05.567179,assistant,"{'action_response': {'function': 'qa_lionagi',...",{'last_updated': {'function': '2024-08-02T18:0...,898677341eaee8b6c0921a272e0846a5,24ccee67652ffa3c7d23f43aa38d89bd
9,930b879cf0ea1bcf9324f4be7a3a15a6,ActionResponse,2024-08-02T18:08:05.567276,assistant,"{'action_response': {'function': 'qa_lionagi',...",{'last_updated': {'function': '2024-08-02T18:0...,898677341eaee8b6c0921a272e0846a5,24ccee67652ffa3c7d23f43aa38d89bd


In [13]:
form.display()

**task**: 
 Follow the prompt and provide the necessary output.
- Additional instruction: 
write a good API documentation for provided code, must use 
query engine to check meanings of related code concepts 
to accurately describe, for example if a name of a variable,
function, class, or module is used but not present in context,
- You MUST check with the query engine. 
- You MUST use extension at least ONCE. 
- You MUST make sure that documentation is accurate and detailed.

- Additional context: import asyncio
import numpy as np
from typing import Callable
from lion_core.libs import to_list, nget
from lionagi.os.operator.imodel.imodel import iModel

async def compute_perplexity(
    imodel: iModel,
    initial_context: str = None,
    tokens: list[str] = None,
    system_msg: str = None,
    n_samples: int = 1,  # number of samples used for the computation
    use_residue: bool = True,  # whether to use residue for the last sample
    **kwargs,  # additional arguments for the model
) -> tuple[list[str], float]:
    tasks = []
    context = initial_context or ""
    n_samples = n_samples or len(tokens)
    sample_token_len, residue = divmod(len(tokens), n_samples)
    samples = []
    if n_samples == 1:
        samples = [tokens]
    else:
        samples = [tokens[: (i + 1) * sample_token_len] for i in range(n_samples)]
        if use_residue and residue != 0:
            samples.append(tokens[-residue:])
    sampless = [context + " ".join(sample) for sample in samples]
    for sample in sampless:
        messages = [{"role": "system", "content": system_msg}] if system_msg else []
        messages.append(
            {"role": "user", "content": sample},
        )
        task = asyncio.create_task(
            imodel.chat_completion(
                messages=messages,
                logprobs=True,
                max_tokens=sample_token_len,
                **kwargs,
            )
        )
        tasks.append(task)
    results = await asyncio.gather(*tasks)  # result is (p...

**reason**: 
 Let's think step by step. To provide accurate and detailed API documentation, it is essential to understand the specific meanings of key terms, functions, and classes used in the provided code. Therefore, querying the meanings of 'perplexity' in NLP, and the functions 'nget', 'to_list', and the class 'iModel' will help ensure the documentation is precise and comprehensive.

**actions**: 
 
 
1. **qa_lionagi**(query: What is the meaning of 'perplexity' in the context of natural language processing (NLP)?), 
 
2. **qa_lionagi**(query: What does the function 'nget' from 'lion_core.libs' do?), 
 
3. **qa_lionagi**(query: What does the function 'to_list' from 'lion_core.libs' do?), 
 
4. **qa_lionagi**(query: What does the class 'iModel' from 'lionagi.os.operator.imodel.imodel' represent?)

**action_required**: True

**extension_required**: None

**action_response**: 
 
 
1. **qa_lionagi**(query: What does the function 'nget' from 'lion_core.libs' do?) 
 
 The function 'nget' retrieves a value from a nested list or dictionary structure using specified indices. If the target location cannot be reached or does not exist, it returns a specified default value or raises a LookupError if no default is provided. The indices can be integers for lists and strings for dictionaries., 
 
2. **qa_lionagi**(query: What does the function 'to_list' from 'lion_core.libs' do?) 
 
 The function 'to_list' from 'lion_core.libs' converts the given input into a list., 
 
3. **qa_lionagi**(query: What does the class 'iModel' from 'lionagi.os.operator.imodel.imodel' represent?) 
 
 The class 'iModel' from 'lionagi.os.operator.imodel.imodel' represents a model interface used for defining the structure and behavior of models within the 'lionagi' system., 
 
4. **qa_lionagi**(query: What does the class 'iModel' from 'lionagi.os.operator.imodel.imodel' represent?) 
 
 The class 'iModel' from 'lionagi.os.operator.imodel.imodel' represents a model interface used for defining the structure and behavior of models within the 'lionagi' system., 
 
5. **qa_lionagi**(query: What is the meaning of 'perplexity' in the context of natural language processing (NLP)?) 
 
 In the context of natural language processing (NLP), 'perplexity' is a measurement used to evaluate the performance of a language model. It quantifies how well a probability distribution or probability model predicts a sample. A lower perplexity indicates that the model is better at predicting the sample, meaning it is more confident in its predictions. Essentially, perplexity measures the uncertainty of the model when it comes to predicting the next word in a sequence.

**action_performed**: True

**available_tools**: ['qa_lionagi']

**answer**: 
 ### Class: `iModel`

**Description**:
The `iModel` class represents a model interface used for defining the structure and behavior of models within the 'lionagi' system.

### Method: `compute_perplexity`

**Signature**:
```python
async def compute_perplexity(
    imodel: iModel,
    initial_context: str = None,
    tokens: list[str] = None,
    system_msg: str = None,
    n_samples: int = 1,
    use_residue: bool = True,
    **kwargs
) -> tuple[list[str], float]:
```

**Parameters**:
- `imodel` (iModel): The model interface used for chat completion.
- `initial_context` (str, optional): The initial context string. Defaults to `None`.
- `tokens` (list[str], optional): List of tokens to be used. Defaults to `None`.
- `system_msg` (str, optional): System message to be included. Defaults to `None`.
- `n_samples` (int, optional): Number of samples used for the computation. Defaults to `1`.
- `use_residue` (bool, optional): Whether to use residue for the last sample. Defaults to `True`.
- `**kwargs`: Additional arguments for the model.

**Return Values**:
- `tuple[list[str], float]`: A tuple containing a list of strings and a float value representing the computed perplexity.

**Exceptions Raised**:
- None explicitly raised.

**Usage Examples**:
```python
# Example usage of compute_perplexity
import asyncio
from lionagi.os.operator.imodel.imodel import iModel

async def main():
    imodel = iModel()
    tokens = ["example", "tokens", "for", "perplexity"]
    result = await compute_perplexity(imodel, tokens=tokens)
    print(result)

asyncio.run(main())
```

### Method: `select_by_pplex`

**Signature**:
```python
def select_by_pplex(
    ranked_items: list,
    target_compression_ratio: float,
    original_length: int,
    tokenizer: Callable,
    min_pplex: float
) -> list:
```

**Parameters**:
- `ranked_items` (list): List of items ranked by perplexity.
- `target_compression_ratio` (float): Desired compression ratio.
- `original_length` (int): Original length of the items.
- `tokenizer` (Callable): Function to tokenize the items.
- `min_pplex` (float): Minimum perplexity threshold.

**Return Values**:
- `list`: List of selected items based on the perplexity and compression ratio.

**Exceptions Raised**:
- None explicitly raised.

**Usage Examples**:
```python
# Example usage of select_by_pplex
ranked_items = [("item1", {"perplexity": 10}), ("item2", {"perplexity": 5})]
target_compression_ratio = 0.5
original_length = 100
tokenizer = lambda x: x.split()

selected_items = select_by_pplex(ranked_items, target_compression_ratio, original_length, tokenizer, min_pplex=3)
print(selected_items)
```

### Concepts Used

- **Perplexity**: In the context of natural language processing (NLP), 'perplexity' is a measurement used to evaluate the performance of a language model. It quantifies how well a probability distribution or probability model predicts a sample. A lower perplexity indicates that the model is better at predicting the sample, meaning it is more confident in its predictions. Essentially, perplexity measures the uncertainty of the model when it comes to predicting the next word in a sequence.

- **nget**: The function 'nget' retrieves a value from a nested list or dictionary structure using specified indices. If the target location cannot be reached or does not exist, it returns a specified default value or raises a LookupError if no default is provided. The indices can be integers for lists and strings for dictionaries.

- **to_list**: The function 'to_list' from 'lion_core.libs' converts the given input into a list.

### Improve

In [14]:
edit = """
provide documentation only: final documentation in md 
format of the module of interest, do not include other 
fields do not present in JSON format, only markdown format 
you asked a lot of good questions and got plenty answers, 
please integrate your conversation, be a lot more technical, you will 
be rewarded with 500 dollars for great work, and 
punished for subpar work, take a deep breath, you can do it
"""

final_output = await branch.chat(instruction=edit)

In [15]:
from IPython.display import Markdown

Markdown(final_output)

## Module Documentation

### Class: `iModel`

**Description**:
The `iModel` class represents a model interface used for defining the structure and behavior of models within the 'lionagi' system. It is designed to facilitate interactions with various language models, providing methods for tasks such as chat completion and perplexity computation.

### Method: `compute_perplexity`

**Signature**:
```python
async def compute_perplexity(
    imodel: iModel,
    initial_context: str = None,
    tokens: list[str] = None,
    system_msg: str = None,
    n_samples: int = 1,
    use_residue: bool = True,
    **kwargs
) -> tuple[list[str], float]:
```

**Parameters**:
- `imodel` (`iModel`): The model interface used for chat completion.
- `initial_context` (`str`, optional): The initial context string. Defaults to `None`.
- `tokens` (`list[str]`, optional): List of tokens to be used. Defaults to `None`.
- `system_msg` (`str`, optional): System message to be included. Defaults to `None`.
- `n_samples` (`int`, optional): Number of samples used for the computation. Defaults to `1`.
- `use_residue` (`bool`, optional): Whether to use residue for the last sample. Defaults to `True`.
- `**kwargs`: Additional arguments for the model.

**Return Values**:
- `tuple[list[str], float]`: A tuple containing a list of strings and a float value representing the computed perplexity.

**Exceptions Raised**:
- None explicitly raised.

**Usage Examples**:
```python
import asyncio
from lionagi.os.operator.imodel.imodel import iModel

async def main():
    imodel = iModel()
    tokens = ["example", "tokens", "for", "perplexity"]
    result = await compute_perplexity(imodel, tokens=tokens)
    print(result)

asyncio.run(main())
```

**Detailed Description**:
The `compute_perplexity` function computes the perplexity of a given set of tokens using the provided `iModel` instance. Perplexity is a measurement used to evaluate the performance of a language model, quantifying how well the model predicts the given tokens. A lower perplexity indicates better performance.

The function creates multiple samples from the provided tokens and computes the perplexity for each sample. The results are aggregated to provide the final perplexity score.

### Method: `select_by_pplex`

**Signature**:
```python
def select_by_pplex(
    ranked_items: list,
    target_compression_ratio: float,
    original_length: int,
    tokenizer: Callable,
    min_pplex: float
) -> list:
```

**Parameters**:
- `ranked_items` (`list`): List of items ranked by perplexity.
- `target_compression_ratio` (`float`): Desired compression ratio.
- `original_length` (`int`): Original length of the items.
- `tokenizer` (`Callable`): Function to tokenize the items.
- `min_pplex` (`float`): Minimum perplexity threshold.

**Return Values**:
- `list`: List of selected items based on the perplexity and compression ratio.

**Exceptions Raised**:
- None explicitly raised.

**Usage Examples**:
```python
ranked_items = [("item1", {"perplexity": 10}), ("item2", {"perplexity": 5})]
target_compression_ratio = 0.5
original_length = 100
tokenizer = lambda x: x.split()

selected_items = select_by_pplex(ranked_items, target_compression_ratio, original_length, tokenizer, min_pplex=3)
print(selected_items)
```

**Detailed Description**:
The `select_by_pplex` function selects items from a list of ranked items based on their perplexity scores and the desired compression ratio. The function iterates through the ranked items, tokenizes them, and selects items until the desired length is reached. Items with perplexity scores below the minimum threshold are ignored.

### Concepts Used

**Perplexity**:
In the context of natural language processing (NLP), 'perplexity' is a measurement used to evaluate the performance of a language model. It quantifies how well a probability distribution or probability model predicts a sample. A lower perplexity indicates that the model is better at predicting the sample, meaning it is more confident in its predictions. Essentially, perplexity measures the uncertainty of the model when it comes to predicting the next word in a sequence.

**nget**:
The function 'nget' retrieves a value from a nested list or dictionary structure using specified indices. If the target location cannot be reached or does not exist, it returns a specified default value or raises a LookupError if no default is provided. The indices can be integers for lists and strings for dictionaries.

**to_list**:
The function 'to_list' from 'lion_core.libs' converts the given input into a list. This is useful for ensuring that the input is in a consistent format for further processing.

### check extension forms

In [16]:
if hasattr(form, "extension_forms"):
    for i in form.extension_forms:
        i.display()

### check actions

In [17]:
print(f"The task invoked query engine {len(pile.query_response)} times.")

for i in pile.query_response:
    display(Markdown(i.response))

The task invoked query engine 4 times.


The function 'to_list' from 'lion_core.libs' converts the given input into a list.

The class 'iModel' from 'lionagi.os.operator.imodel.imodel' represents a model interface used for defining the structure and behavior of models within the 'lionagi' system.

The function 'nget' retrieves a value from a nested list or dictionary structure using specified indices. If the target location cannot be reached or does not exist, it returns a specified default value or raises a LookupError if no default is provided. The indices can be integers for lists and strings for dictionaries.

In the context of natural language processing (NLP), 'perplexity' is a measurement used to evaluate the performance of a language model. It quantifies how well a probability distribution or probability model predicts a sample. A lower perplexity indicates that the model is better at predicting the sample, meaning it is more confident in its predictions. Essentially, perplexity measures the uncertainty of the model when it comes to predicting the next word in a sequence.