# chatrecord
> Tools for exporting chat related portions of Langchain runs.

In [None]:
#| default_exp chatrecord

In [None]:
#|export
from typing import List, Iterable, Union
from collections import Counter
from pathlib import Path
import pickle

import pandas as pd
from pydantic import BaseModel
import langsmith
from fastcore.foundation import first, L
from fastcore.test import test_eq
from langfree.runs import (get_runs_by_commit, 
                           get_params, get_functions,
                          get_feedback)
from langfree.transform import RunData
from langsmith import Client

In [None]:
from nbdev.showdoc import show_doc

In [None]:
#|export
class NoChatOpenAI(Exception):
    def __init__(self, message, extra_data=None):
        super().__init__(message)

In [None]:
#|export
def get_nested_child_run(run):
    "Get the last nested `ChatOpenAI` run inside a Runnable Agent."
    client = Client()
    run = client.read_run(run_id=run.id, load_child_runs=True)
    oai_children = []
    for r in run.child_runs:
        if r.name == 'RunnableAgent':
            for c in r.child_runs:
                if c.name == 'ChatOpenAI':
                    oai_children.append(c)
        if r.name == 'ChatOpenAI':
            oai_children.append(r)
    if not oai_children:
        raise NoChatOpenAI(f'Not able to find ChatOpenAI child run from root run {run.id}')
    return oai_children[-1]

def get_child_chat_run(run):
    "Get the last child `ChatOpenAI` run."
    client = Client()
    if run.parent_run_id is not None:
    # if run.execution_order != 1: # this is a child run, get the parent
        run = client.read_run(run.parent_run_id)

    crun = get_nested_child_run(run)
    return run, crun

In [None]:
#|hide
client = Client()
_run_id = '98d1c463-bf25-46a1-90f2-a3a1b5e2fa3f'
_root_run = client.read_run(_run_id)
assert get_child_chat_run(_root_run)

In [None]:
#|hide
client = Client()
_root_run = client.read_run('fbfd220a-c731-46a2-87b3-e64a477824f5')
assert client.read_run(run_id=_root_run.id, load_child_runs=True)

In [None]:
#|export
class ChatRecord(BaseModel):
    "A parsed run from LangSmith, focused on the `ChatOpenAI` run type."
    child_run_id:str
    child_run:RunData
    child_url:Union[str,None] = None
    parent_run_id:Union[str,None] = None
    parent_url: Union[str,None] = None
    total_tokens:Union[int, None]
    prompt_tokens:Union[int, None]
    completion_tokens:Union[int, None]
    feedback: Union[List,None] = None
    feedback_keys: Union[List,None] = None
    tags: Union[List,None] = []
    start_dt: Union[str, None] = None
    function_defs: Union[List,None] = None
    param_model_name: Union[str,None]= None
    param_n: Union[int, None] = None
    param_top_p: Union[int, None] = None
    param_temp: Union[int, None] = None
    param_presence_penalty: Union[int, None] = None
    param_freq_penalty: Union[int, None] = None

    @property
    def flat_input(self): return self.child_run.flat_input
    
    @property
    def flat_output(self): return self.child_run.flat_output

    @classmethod
    def from_run_id(cls, 
                    run_id:str # the run id to fetch and parse.
                   ):
        "Collect information About A Run into a `ChatRecord`."
        client = Client()
        return cls.from_run(client.read_run(run_id=run_id))
    
    @classmethod
    def from_run(cls, 
                 run:langsmith.schemas.Run # the run object to parse.
                ):
        "Collect information About A Run into a `ChatRecord`."
        run, crun = get_child_chat_run(run)
    
        if crun:
            params = get_params(crun)
            _feedback = get_feedback(run) # you must get feedback from the root
            
            return cls(child_run_id=str(crun.id),
                       child_run=RunData.from_run_id(str(crun.id)),
                       child_url=crun.url,
                       parent_run_id=str(run.id) if run else None,
                       parent_url=run.url if run else None,
                       total_tokens=crun.total_tokens,
                       prompt_tokens=crun.prompt_tokens,
                       completion_tokens=crun.completion_tokens,
                       feedback=_feedback, 
                       feedback_keys=list(L(_feedback).attrgot('key').filter()),
                       tags=run.tags,
                       start_dt=run.start_time.strftime('%m/%d/%Y'),
                       function_defs=get_functions(crun),
                       **params)

When instantiating `ChatRecord` with the class methods `ChatRecord.from_run` or `ChatRecord.from_run_id`, we automatically query the parent run of the LangChain trace in LangSmith to get metadata like feedback.  Additionally, if you instantiate `ChatRecord` with a root run or a run that is not a `ChatOpenAI` run type, `ChatRecord` will attempt to find the last `ChatOpenAI` in your chain and store the id in `ChatRecord.child_run_id`.  The data for this child run (inputs, outputs, functions) is stored in `ChatRecord.child_run` and is of type `RunData`.

In [None]:
#|hide
# this used to cause a confusing deserialiization error
from langchain.load import load

_tst_run_id = '98d1c463-bf25-46a1-90f2-a3a1b5e2fa3f'
client = Client()
_trun = client.read_run(run_id=_tst_run_id)
_run, _crun = get_child_chat_run(_trun)

_msg = _crun.outputs['generations'][0]['message']
load(_msg)

  warn_beta(


AIMessage(content='```json\n{"id":"df952a3b-d04b-4329-865d-ef37e727da38","type":"template_instance"}\n```')

In [None]:
show_doc(ChatRecord.from_run, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/chatrecord.py#L93){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecord.from_run

>      ChatRecord.from_run (run:langsmith.schemas.Run)

Collect information About A Run into a `ChatRecord`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| run | Run | the run object to parse. |

In [None]:
client = Client()
_root_run = client.read_run('fbfd220a-c731-46a2-87b3-e64a477824f5')
_root_result = ChatRecord.from_run(_root_run)

In [None]:
show_doc(ChatRecord.from_run_id, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/chatrecord.py#L85){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecord.from_run_id

>      ChatRecord.from_run_id (run_id:str)

Collect information About A Run into a `ChatRecord`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| run_id | str | the run id to fetch and parse. |

In [None]:
_child_run_id = str(_root_run.child_run_ids[-1])
_child_result = ChatRecord.from_run_id(_child_run_id)

Tests

In [None]:
# test that child and root runs are related
test_eq(_root_result.flat_output, _child_result.flat_output)
test_eq(_root_result.parent_run_id, _child_result.parent_run_id)

# Test case without feedback
_parent_run_no_feedback = client.read_run('87900cfc-0322-48fb-b009-33d226d73597')
_no_feedback = ChatRecord.from_run(_parent_run_no_feedback)
test_eq(_no_feedback.feedback, [])

# Test case with feedback

#  ... starting with a child run
_child_w_feedback = client.read_run('f8717b0e-fb90-45cd-be00-9b4614965a2e')
_feedback = ChatRecord.from_run(_child_w_feedback).feedback
assert _feedback[0]['key'] == 'empty response'

# #  ... starting with a parent run
_parent_w_feedback = client.read_run(_child_w_feedback.parent_run_id)
_feedback2 = ChatRecord.from_run(_parent_w_feedback).feedback
test_eq(_feedback[0]['comment'],  _feedback2[0]['comment'])

## `ChatRecordSet`, a list of `ChatRecord`

In [None]:
#|export
class ChatRecordSet(BaseModel):
    "A List of `ChatRecord`."
    records: List[ChatRecord]
    
    @classmethod
    def from_commit(cls, commit_id:str, limit:int=None):
        "Create a `ChatRecordSet` from a commit id"
        _runs = get_runs_by_commit(commit_id=commit_id, limit=limit)
        return cls.from_runs(_runs)
    
    @classmethod
    def from_runs(cls, runs:List[langsmith.schemas.Run]):
        "Load ChatRecordSet from runs."
        _records = []
        for r in runs:
            try: _records.append(ChatRecord.from_run(r))
            except NoChatOpenAI as e: print(e) 
        return cls(records=_records)

    @classmethod
    def from_run_ids(cls, runs:List[str]):
        "Load ChatRecordSet from run ids."
        _records = []
        for r in runs:
            try: _records.append(ChatRecord.from_run_id(r))
            except NoChatOpenAI as e: print(e)
        return cls(records=_records)
    
    def __len__(self): return len(self.records)

    def __getitem__(self, index: int) -> ChatRecord:
        return self.records[index]

    def __repr__(self):
        return f'`List[ChatRecord]` of size {len(self.records)}.'
    
    def save(self, path:str):
        "Save data to disk."
        dest_path = Path(path)
        if not dest_path.parent.exists(): dest_path.parent.mkdir(exist_ok=True)
        with open(dest_path, 'wb') as f:
            pickle.dump(self, f)
            return dest_path
        
    def __iter__(self): 
        for r in self.records: 
            yield r
    
    @classmethod
    def load(cls, path:str):
        "Load data from disk."
        src_path = Path(path)
        with open(src_path, 'rb') as f:
            obj = pickle.load(f)
            if isinstance(obj, cls):
                return obj
            else:
                raise TypeError(f"The loaded object is not of type {cls.__name__}")
                
    def to_pandas(self):
        "Convert the `ChatRecordSet` to a pandas.DataFrame."
        records = L(self.records).map(dict)                      
        return pd.DataFrame(records)

    def to_dicts(self):
        "Convert the ChatRecordSet to a list of dicts, which you can convert to jsonl."
        return list(L(self.records).map(lambda x: x.child_run.to_msg_dict()))

In [None]:
show_doc(ChatRecordSet.from_runs, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/chatrecord.py#L130){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecordSet.from_runs

>      ChatRecordSet.from_runs (runs:List[langsmith.schemas.Run])

Load ChatRecordSet from runs.

We can create a `ChatRecordSet` directly from a list of runs:

In [None]:
# from langfree.runs import get_runs_by_commit
_runs = get_runs_by_commit(commit_id='028e4aa4', limit=10)
llmdata = ChatRecordSet.from_runs(_runs)

Fetching runs with this filter: and(eq(status, "success"), has(tags, "commit:028e4aa4"))


There is a special shortcut to get runs by a commit tag which uses `get_runs_by_commit` for you:

In [None]:
show_doc(ChatRecordSet.from_commit, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/chatrecord.py#L124){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecordSet.from_commit

>      ChatRecordSet.from_commit (commit_id:str, limit:int=None)

Create a `ChatRecordSet` from a commit id

In [None]:
llmdata2 = ChatRecordSet.from_commit('028e4aa4', limit=10)
assert llmdata[0].child_run_id == llmdata2[0].child_run_id

Fetching runs with this filter: and(eq(status, "success"), has(tags, "commit:028e4aa4"))


Finally, you can also construct a `ChatRecordSet` from a list of run ids:

In [None]:
show_doc(ChatRecordSet.from_run_ids, title_level=4)

---

[source](https://github.com/parlance-labs/langfree/blob/main/langfree/chatrecord.py#L139){target="_blank" style="float:right; font-size:smaller"}

#### ChatRecordSet.from_run_ids

>      ChatRecordSet.from_run_ids (runs:List[str])

Load ChatRecordSet from run ids.

In [None]:
_run_ids = ['ba3c0a47-0803-4b0f-8a2f-380722edc2bf',
 '842fe1b4-c650-4bfa-bcf9-bf5c30f8204c',
 '5c06bbf3-ef14-47a1-a3a4-221f65d4a407',
 '327039ab-a0a5-488b-875f-21e0d30ee2cd']

llmdata3 = ChatRecordSet.from_run_ids(_run_ids)
assert len(llmdata3) == len(_run_ids)
assert llmdata[0].child_run_id == _run_ids[0]

### Convert `ChatRecordSet` to a Pandas Dataframe

You can do this with `to_pandas()`

In [None]:
_df = llmdata.to_pandas()
_df.head(1)

Unnamed: 0,child_run_id,child_run,child_url,parent_run_id,parent_url,total_tokens,prompt_tokens,completion_tokens,feedback,feedback_keys,tags,start_dt,function_defs,param_model_name,param_n,param_top_p,param_temp,param_presence_penalty,param_freq_penalty
0,ba3c0a47-0803-4b0f-8a2f-380722edc2bf,"inputs=[{'role': 'system', 'content': 'You are...",https://smith.langchain.com/o/9d90c3d2-ca7e-4c...,7074af93-1821-4325-9d45-0f2e81eca0fe,https://smith.langchain.com/o/9d90c3d2-ca7e-4c...,0,0,0,[],[],"[commit:028e4aa4, branch:testing, test, room:6...",09/05/2023,"[{'name': 'contact-finder', 'parameters': {'ty...",gpt-3.5-turbo-0613,1,1,0,0,0


In [None]:
#|hide
assert _df.shape[0] == 10

### Save Data

In [None]:
#|eval: false
llmdata.save('_data/llm_data.pkl')

Path('_data/llm_data.pkl')

### Load Data

In [None]:
#|eval: false
_loaded = ChatRecordSet.load('_data/llm_data.pkl')
assert llmdata.records[0].child_run_id == _loaded.records[0].child_run_id

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()