# experimental
> Tools for exporting parts of Langchain runs in a very opinionated way.

In [None]:
#| default_exp experimental

In [None]:
#|export
from typing import List, Iterable, Union
from collections import Counter
from pathlib import Path
import pickle

import pandas as pd
from pydantic import BaseModel
import langsmith
from fastcore.foundation import first, L
from fastcore.test import test_eq
from langfree.runs import (get_runs_by_commit, get_output, get_input, 
                           get_params, get_functions,
                          get_feedback, take)
from langfree.transform import RunData
from langsmith import Client

In [None]:
from nbdev.showdoc import show_doc

In [None]:
#|export
class ChatRecord(BaseModel):
    "A parsed run from LangSmith, focused on the `ChatOpenAI` run type."
    child_run_id:str
    parent_run_id:str
    child_run:RunData
    url: str
    total_tokens:Union[int, None]
    prompt_tokens:Union[int, None]
    completion_tokens:Union[int, None]
    feedback: Union[List,None] = None
    feedback_keys: Union[List,None] = None
    tags: Union[List,None] = []
    start_dt: Union[str, None] = None
    parent_url: Union[str,None] = None
    parent_id: Union[str,None] = None
    function_defs: Union[List,None] = None
    param_model_name: Union[str,None]= None
    param_n: Union[int, None] = None
    param_top_p: Union[int, None] = None
    param_temp: Union[int, None] = None
    param_presence_penalty: Union[int, None] = None
    param_freq_penalty: Union[int, None] = None
    warnings: List[str] = []

    @property
    def flat_input(self): return self.child_run.flat_input
    
    @property
    def flat_output(self): return self.child_run.flat_output

    @classmethod
    def from_run_id(cls, 
                    run_id:str # the run id to fetch and parse.
                   ):
        "Collect information About A Run into a `ChatRecord`."
        client = Client()
        return cls.from_run(client.read_run(run_id=run_id))
    
    @classmethod
    def from_run(cls, 
                 run:langsmith.schemas.Run # the run object to parse.
                ):
        "Collect information About A Run into a `ChatRecord`."
        client = Client()
        warnings = []
        if run.execution_order != 1: # this is a child run, get the parent
            run = client.read_run(run.parent_run_id)
            
        _cruns = client.read_run(run_id=run.id, load_child_runs=True).child_runs
        crun = None
        if _cruns:
            if _cruns[-1].name != 'ChatOpenAI': 
                warnings.append('Last Step Not ChatOpenAI')
            crun = [c for c in _cruns if c.name == 'ChatOpenAI'][-1]
    
        if crun:
            _input, _output = get_input(crun), get_output(crun)      
            if 'Agent stopped due to max iterations' in _input: warnings.append('Max Iterations')
            if _output.strip() == '': warnings.append('No Output')
            
            params = get_params(crun)
            _feedback = get_feedback(run) # you must get feedback from the root
            
            return cls(child_run_id=str(crun.id),
                       parent_run_id=str(run.id),
                       child_run=RunData.from_run_id(str(crun.id)),
                       url=crun.url,
                       total_tokens=crun.total_tokens,
                       prompt_tokens=crun.prompt_tokens,
                       completion_tokens=crun.completion_tokens,
                       feedback=_feedback, 
                       feedback_keys=list(L(_feedback).attrgot('key').filter()),
                       tags=run.tags,
                       start_dt=run.start_time.strftime('%m/%d/%Y'),
                       parent_url=run.url if run else None,
                       parent_id=str(run.id) if run else None,
                       function_defs=get_functions(crun),
                       warnings=warnings,
                       **params)

When instantiating `ChatRecord` with the class methods `ChatRecord.from_run` or `ChatRecord.from_run_id`, we automatically query the parent run of the LangChain trace in LangSmith to get metadata like feedback.  Additionally, if you instantiate `ChatRecord` with a root run or a run that is not a `ChatOpenAI` run type, `ChatRecord` will attempt to find the last `ChatOpenAI` in your chain and store the id in `ChatRecord.child_run_id`.  The data for this child run (inputs, outputs, functions) is stored in `ChatRecord.child_run` and is of type `RunData`.

In [None]:
show_doc(ChatRecord.from_run, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/experimental.py#L60){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecord.from_run

>      ChatRecord.from_run (run:langsmith.schemas.Run)

Collect information About A Run into a `ChatRecord`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| run | Run | the run object to parse. |

In [None]:
client = Client()
_root_run = client.read_run('fbfd220a-c731-46a2-87b3-e64a477824f5')
_root_result = ChatRecord.from_run(_root_run)

In [None]:
show_doc(ChatRecord.from_run_id, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/experimental.py#L55){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecord.from_run_id

>      ChatRecord.from_run_id (run_id:str)

Collect information About A Run into a `ChatRecord`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| run_id | str | the run id to fetch and parse. |

In [None]:
_child_run_id = str(_root_run.child_run_ids[-1])
_child_result = ChatRecord.from_run_id(_child_run_id)

Tests

In [None]:
# test that child and root runs are related
test_eq(_root_result.flat_output, _child_result.flat_output)
test_eq(_root_result.parent_run_id, _child_result.parent_run_id)

# Test case without feedback
_parent_run_no_feedback = client.read_run('87900cfc-0322-48fb-b009-33d226d73597')
_no_feedback = ChatRecord.from_run(_parent_run_no_feedback)
test_eq(_no_feedback.feedback, [])

# Test case with feedback

#  ... starting with a child run
_child_w_feedback = client.read_run('f8717b0e-fb90-45cd-be00-9b4614965a2e')
_feedback = ChatRecord.from_run(_child_w_feedback).feedback
assert _feedback[0]['key'] == 'Empty Response'

# #  ... starting with a parent run
_parent_w_feedback = client.read_run(_child_w_feedback.parent_run_id)
_feedback2 = ChatRecord.from_run(_parent_w_feedback).feedback
test_eq(_feedback[0]['comment'],  _feedback2[0]['comment'])

## Saving & Loading A Dataset of `ChatRecord`

In [None]:
#|export
class ChatRecordSet(BaseModel):
    "A List of `ChatRecord`."
    records: List[ChatRecord]
    
    @classmethod
    def from_commit(cls, commit_id:str):
        "Create a `LLMDataset` from a commit id"
        _runs = get_runs_by_commit(commit_id=commit_id)
        return cls.from_runs(_runs)
    
    @classmethod
    def from_runs(cls, runs:List[langsmith.schemas.Run]):
        "Load LLMDataset from runs."
        _records=[ChatRecord.from_run(r) for r in runs]
        records=[r for r in _records if _records]
        return cls(records=records)
    
    def __len__(self): return len(self.records)
    
    def save(self, path:str):
        "Save data to disk."
        dest_path = Path(path)
        if not dest_path.parent.exists(): dest_path.parent.mkdir(exist_ok=True)
        with open(dest_path, 'wb') as f:
            pickle.dump(self, f)
            return dest_path
        
    def __iter__(self): 
        for r in self.records: 
            yield r
    
    @classmethod
    def load(cls, path:str):
        "Load data from disk."
        src_path = Path(path)
        with open(src_path, 'rb') as f:
            obj = pickle.load(f)
            if isinstance(obj, cls):
                return obj
            else:
                raise TypeError(f"The loaded object is not of type {cls.__name__}")
                
    def to_pandas(self):
        "Convert the `LLMDataset` to a pandas.DataFrame."
        records = L(self.records).map(lambda x: dict(
                                                     flat_input=x.flat_input,
                                                     flat_output=x.flat_output,
                                                     **dict(x)
                                                    )
                                     )                           
        return pd.DataFrame(records)

In [None]:
from langfree.runs import get_runs_by_commit
_runs = get_runs_by_commit(commit_id='028e4aa4')
llmdata = ChatRecordSet.from_runs(take(_runs, 10))

Fetching runs with this filter: and(eq(status, "success"), has(tags, "commit:028e4aa4"))


### Convert `LLMDataset` to a Pandas Dataframe

You can do this with `to_pandas()`

In [None]:
_df = llmdata.to_pandas()
_df.head(1)

Unnamed: 0,flat_input,flat_output,child_run_id,parent_run_id,child_run,url,total_tokens,prompt_tokens,completion_tokens,feedback,...,parent_url,parent_id,function_defs,param_model_name,param_n,param_top_p,param_temp,param_presence_penalty,param_freq_penalty,warnings
0,### System\n\nYou are an AI assistant named Re...,### Assistant\n\nHere are the contact details ...,ba3c0a47-0803-4b0f-8a2f-380722edc2bf,7074af93-1821-4325-9d45-0f2e81eca0fe,"inputs=[{'role': 'system', 'content': 'You are...",https://smith.langchain.com/o/9d90c3d2-ca7e-4c...,,,,[],...,https://smith.langchain.com/o/9d90c3d2-ca7e-4c...,7074af93-1821-4325-9d45-0f2e81eca0fe,"[{'name': 'contact-finder', 'parameters': {'ty...",gpt-3.5-turbo-0613,1,1,0,0,0,[]


In [None]:
#|hide
assert _df.shape[0] == 10

### Save Data

In [None]:
#|eval: false
llmdata.save('_data/llm_data.pkl')

Path('_data/llm_data.pkl')

### Load Data

In [None]:
#|eval: false
_loaded = ChatRecordSet.load('_data/llm_data.pkl')
assert llmdata.records[0].child_run_id == _loaded.records[0].child_run_id