# runs

> Get lang model runs from langsmith

In [None]:
#| default_exp runs

In [None]:
#|export
from collections import defaultdict
import os
from datetime import date, timedelta, datetime
from itertools import islice
from typing import List, Iterable
from pprint import pformat
from contextlib import contextmanager

import pandas as pd
from langchain.load import load
import langsmith
from langsmith import Client
from fastcore.foundation import L, first

In [None]:
#|export
@contextmanager
def _temp_env_var(vars_dict):
    "Temporarily set environment variables (for testing)"
    original_values = {name: os.environ.get(name) for name in vars_dict.keys()}
    
    # Set temporary values
    for name, value in vars_dict.items():
        os.environ[name] = value
        
    try:
        yield
    finally:
        # Revert changes after block execution
        for name, original_value in original_values.items():
            if original_value is None:
                del os.environ[name]
            else:
                os.environ[name] = original_value

In [None]:
#|export
def check_api_key(nm="LANGCHAIN_HUB_API_KEY"):
    val = os.getenv(nm)
    if not val: raise Exception(f"You must set the environment variable {nm}")
    return val

In [None]:
#|export
#|hide
check_api_key("LANGCHAIN_API_KEY")
check_api_key("LANGCHAIN_ENDPOINT")
check_api_key("LANGSMITH_PROJECT_ID")
client = Client()

## Get Runs

### Background 

Langsmith offers a convenient [python client](https://github.com/langchain-ai/langsmith-sdk) for retrieving runs.  [The docs](https://docs.smith.langchain.com/tracing/use-cases/export-runs/local) go into further detail about the various options available.  Some useful patterns to know are:

Getting a list of runs:

```python
from langsmith import Client
client = Client()
project_runs = client.list_runs(project_name="<your_project>")
```

Getting a specific run:

```python
from langsmith import Client
client = Client()
run = client.client.read_run("<run_id>")
```

Furthermore, there are various ways to filter and search runs which are described in [the documentation](https://docs.smith.langchain.com/tracing/use-cases/export-runs).  If these suit your needs, you may not need the utilities in this module.  This module offers opinionated wrappers around the Langsmith client that retrieve runs using common patterns we have seen.

### Utilities

The following functions help retrieve runs by a very specific kind of [tag](https://docs.smith.langchain.com/tracing/tracing-faq#how-do-i-add-tags-to-runs), as well as recent runs.

In [None]:
#|export
def take(l:Iterable, n:int):
    "Take first n entries from a generator"
    return L(islice(l, n))

In [None]:
#|export
def get_runs_by_commit(commit_id:str=None, # The commit ID to filter by 
             proj_id:str=None, # Langsmith Project ID
             only_success=True, # Only include runs that are successfull
             run_type='chain', # The run type
             start_dt:str=None, # The start date to filter by
             end_dt:str=None,    # the end date to filter by
             limit:int=None       # The maximum number of runs to return
            ):
    "Get all runs tagged with a particular commit id (the short version of the SHA) in LangSmith."
    
    success_query='eq(status, "success")' if only_success else ''
    commit_query = f'has(tags, "commit:{commit_id}")' if commit_id else ''
    proj_id = check_api_key("LANGSMITH_PROJECT_ID") if not proj_id else proj_id
    time_query=''
    
    if start_dt:
        time_query=f'gte(start_time, "{start_dt}")'
        if end_dt:
            time_query = f'{time_query}, lte(start_time, "{end_dt}")'
    
    queries = ', '.join(L([success_query, commit_query, time_query]).filter())
    query_string = None if not queries else f'and({queries})'
    if query_string: print(f'Fetching runs with this filter: {query_string}')

    client = Client()
    runs = client.list_runs(
        filter=query_string,
        project_id=proj_id,
        execution_order=1, # this gets the root runs
        error=False,
        run_type=run_type,
    )
    return list(runs) if limit is None else take(runs, limit)

The idea behind `get_runs_by_commit` is to quickly retrieve runs that are being logged to langsmith in CI, for example if you are running offline tests automatically against your language models. For example, let's get runs with the tag `commit:4f59dcec` in LangSmith (this is specific to my project).

In [None]:
#|hide
_runs = get_runs_by_commit('4f59dcec', limit=5)
assert set(_runs.map(lambda x: x.tags[0])) == {'commit:4f59dcec'} # check that all runs have this tag
assert set(_runs.map(lambda x: x.status)) == {'success'} # check that these runs are successfull

Fetching runs with this filter: and(eq(status, "success"), has(tags, "commit:4f59dcec"))


In [None]:
#|hide
_runs = L(get_runs_by_commit(start_dt='10/4/2023', end_dt='10/5/2023'))
n_runs = len(_runs)
assert n_runs > 100

Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "10/4/2023"), lte(start_time, "10/5/2023"))


In [None]:
#|export
def get_last_child(runs: List[langsmith.schemas.Run]):
    "Get the child runs for a list of runs."
    return [client.read_run(r.child_run_ids[-1]) for r in runs if r.child_run_ids]

In LangSmith, the last child is often useful to view the final call to the language model.

In [None]:
_child_runs = get_last_child(take(_runs, 3))
assert _child_runs[0].child_run_ids is None # the child doesn't have other children
assert _child_runs[0].execution_order != 1  # the child shouldn't be executed first

In [None]:
#|export
def get_recent_runs(start_dt=None, end_dt=None, last_n_days=2, limit=None):
    "Get recent runs from Langsmith.  If `start_dt` is None gets the `last_n_days`."
    client = Client()
    if start_dt is None:
        _runs = client.list_runs(project_id=check_api_key("LANGSMITH_PROJECT_ID"), limit=1)
        latest_run_dt = first(_runs).start_time
        start_dt_obj = latest_run_dt - timedelta(days=last_n_days)
    else:
        start_dt_obj = datetime.strptime(start_dt, '%m/%d/%Y')
        
    if end_dt is None:
        if start_dt is None:
            end_dt_obj = start_dt_obj + timedelta(days=last_n_days+1) # their logic is off lte is really lt
        else:
            end_dt_obj = datetime.strptime(start_dt, '%m/%d/%Y') + timedelta(days=last_n_days+1) # their logic is off lte is really lt   
    else:
        if start_dt is None:
            raise ValueError("end_dt should only be provided if start_dt is provided.")
        end_dt_obj = datetime.strptime(end_dt, '%m/%d/%Y')
    
    
    runs = get_runs_by_commit(start_dt=start_dt_obj.strftime('%m/%d/%Y'),
                    end_dt=end_dt_obj.strftime('%m/%d/%Y'))
    return list(runs) if limit is None else take(runs, limit)

It is often helpful to get runs in a batch in a date range:

In [None]:
_runs1 = get_recent_runs(start_dt='10/4/2023', end_dt='10/5/2023', limit=10)
assert len(_runs1) == 10

_runs2 = get_recent_runs(start_dt='10/3/2023', limit=10)
assert len(_runs2) == 10

_runs3 = get_recent_runs(limit=10)
assert len(_runs3) == 10

Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "10/04/2023"), lte(start_time, "10/05/2023"))
Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "10/03/2023"), lte(start_time, "10/06/2023"))
Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "11/29/2023"), lte(start_time, "12/02/2023"))


In [None]:
#|export
def get_recent_commit_tags(start_dt=None, end_dt=None, last_n_days=2, return_df=False):
    "Print a table of recent commit SHAs from Langsmith along with their counts that you can filter on"
    runs = L(get_recent_runs(start_dt=start_dt, end_dt=end_dt, last_n_days=last_n_days))
    data = runs.map(lambda x: {'start_dt': x.start_time.strftime('%m/%d/%Y'),
                        'commit': first([t.split('commit:')[-1] for t in x.tags if t.startswith('commit:')])
                       }
            )
    if data:
        df = pd.DataFrame(data)
        agg = df.groupby(['start_dt']).value_counts().reset_index()
        agg = agg.rename(columns={0: 'count'}).sort_values(by=['start_dt', 'count'], ascending=False)
        if not return_df:
            print(agg.to_markdown(index=False))
        else:
            return agg
        
    else:
        print(f'No commits found for {start_dt} - {end_dt}')
        return None

Because I like to tag my LangSmith runs with commit SHA (see `get_runs_by_commit`), I also want to see the most recent commit SHAs so I know what to query!

In [None]:
#|eval:false
get_recent_commit_tags()

Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "11/29/2023"), lte(start_time, "12/02/2023"))
| start_dt   | commit   |   count |
|:-----------|:---------|--------:|
| 12/01/2023 | cca20d7c |     573 |
| 12/01/2023 | e89ab9c7 |     573 |
| 11/30/2023 | aa03053e |     573 |
| 11/30/2023 | ae5dfea1 |     573 |
| 11/30/2023 | 6f4d79fb |     572 |
| 11/30/2023 | 9983d79c |     572 |
| 11/29/2023 | f245175d |    1134 |


`get_recent_commit_tags` can also return a Pandas dataframe:

In [None]:
#|eval:false
_df = get_recent_commit_tags(return_df=True)
assert _df.shape[0] >= 1

Fetching runs with this filter: and(eq(status, "success"), gte(start_time, "11/29/2023"), lte(start_time, "12/02/2023"))


### Other Ways Of Getting Runs

You may also want to query runs by [feedback](https://docs.smith.langchain.com/evaluation/capturing-feedback), however there are many degrees of freedom with how you can implement feedback.  Furthermore, there are many ways you can utilize tags.  For these cases, we suggest using the `langsmith` client directly as [discussed earlier](#Background).  

We will continue to update this library with additional recipes should we find other common patterns that are generalizable.

## Parse Data

In [None]:
#|export
def _ischatopenai(run): 
    if run.name != 'ChatOpenAI':
        raise TypeError(f'Run: {run.id} is of type `{run.name}`, but can only parse `ChatOpenAI` runs.')

In [None]:
#|export
def get_params(run:langsmith.schemas.Run) -> dict:
    "Get important parameters from a run logged in LangSmith"
    if 'invocation_params' in run.extra:
        p = run.extra['invocation_params']
        return dict(param_model_name=p.get('model'),
                    param_n=p.get('n'),
                    param_top_p=p.get('top_p'),
                    param_temp=p.get('temperature'),
                    param_presence_penalty=p.get('presence_penalty'),
                    param_freq_penalty=p.get('frequency_penalty')
                   )
    else: return {}    

In [None]:
_run = client.read_run('8cd7deed-9547-4a07-ac01-55e9513ca1cd')
get_params(_run)

{'param_model_name': 'gpt-3.5-turbo-0613',
 'param_n': 1,
 'param_top_p': 1,
 'param_temp': 0,
 'param_presence_penalty': 0,
 'param_freq_penalty': 0}

In [None]:
#|export
def get_functions(run:langsmith.schemas.Run) -> List[dict]:
    "Get function definitions from a LangSmith run."
    if 'invocation_params' in run.extra:
        p = run.extra['invocation_params']
        return p.get('functions', [])
    else: return []

In [None]:
_funcs = get_functions(_run)
for f in _funcs:
    print(f['name'])

contact-finder
contact-creator
email-campaign-creator
task-creator
task-finder
human-chat
calculator
knowledge-base


In [None]:
#|hide
_funcs = get_functions(_run)
assert _funcs[0]['name'] == 'contact-finder'
assert len(_funcs) > 1

In [None]:
#|export
def get_feedback(run:langsmith.schemas.Run) -> list:
    "Get feedback from a run if exists."
    raw = L(client.list_feedback(run_ids=[run.id]))
    return list(raw.map(lambda x: dict(key=x.key, 
                                       score=x.score, 
                                       value=x.value, 
                                       comment=x.comment, 
                                       correction=x.correction)
                       )
               )

In [None]:
_feedback = get_feedback(client.read_run('7aba254d-3812-4050-85a5-ed64af50d2f1'))
assert _feedback[0]['score'] == 0
assert _feedback[0]['key'] == 'Empty Response'
_feedback

[{'key': 'Empty Response',
  'score': 0.0,
  'value': None,
  'comment': "expected '' to have a length above 0 but got 0",
  'correction': None}]

## Exporting Runs To Pandas

See the [chatrecord](03_chatrecord.ipynb) module.

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()