Merge pull request #3 from parlance-labs/upgrade

upgrade to latest version of langsmith
parlance-labs · Feb 20, 2024 · 6dc66a9 · 6dc66a9
2 parents 50ef78d + 2b2b243
commit 6dc66a9
Show file tree

Hide file tree

Showing 9 changed files with 87 additions and 393 deletions.
diff --git a/langfree/__init__.py b/langfree/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.27"
+__version__ = "0.0.28"
diff --git a/langfree/_modidx.py b/langfree/_modidx.py
@@ -71,16 +71,8 @@
                                     'langfree.transform.RunData.to_json': ('transform.html#rundata.to_json', 'langfree/transform.py'),
                                     'langfree.transform.RunData.to_msg_dict': ( 'transform.html#rundata.to_msg_dict',
                                                                                 'langfree/transform.py'),
-                                    'langfree.transform._gen_name': ('transform.html#_gen_name', 'langfree/transform.py'),
-                                    'langfree.transform._sub_name_in_func': ('transform.html#_sub_name_in_func', 'langfree/transform.py'),
-                                    'langfree.transform._sub_name_in_output': ( 'transform.html#_sub_name_in_output',
-                                                                                'langfree/transform.py'),
                                     'langfree.transform.chat': ('transform.html#chat', 'langfree/transform.py'),
                                     'langfree.transform.fetch_run_componets': ( 'transform.html#fetch_run_componets',
                                                                                 'langfree/transform.py'),
-                                    'langfree.transform.gen_name': ('transform.html#gen_name', 'langfree/transform.py'),
-                                    'langfree.transform.rephrase': ('transform.html#rephrase', 'langfree/transform.py'),
-                                    'langfree.transform.reword_input': ('transform.html#reword_input', 'langfree/transform.py'),
-                                    'langfree.transform.tsfm_nm_rephrase': ('transform.html#tsfm_nm_rephrase', 'langfree/transform.py'),
                                     'langfree.transform.validate_jsonl': ('transform.html#validate_jsonl', 'langfree/transform.py'),
                                     'langfree.transform.write_to_jsonl': ('transform.html#write_to_jsonl', 'langfree/transform.py')}}}
diff --git a/langfree/chatrecord.py b/langfree/chatrecord.py
@@ -29,7 +29,8 @@ def __init__(self, message, extra_data=None):
 def get_child_chat_run(run):
     "Get the last child `ChatOpenAI` run."
     client = Client()
-    if run.execution_order != 1: # this is a child run, get the parent
+    if run.parent_run_id is not None:
+    # if run.execution_order != 1: # this is a child run, get the parent
         run = client.read_run(run.parent_run_id)
 
     _cruns = client.read_run(run_id=run.id, load_child_runs=True).child_runs

diff --git a/langfree/transform.py b/langfree/transform.py
@@ -1,8 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_transform.ipynb.
 
 # %% auto 0
-__all__ = ['client', 'chat', 'rephrase', 'gen_name', 'fetch_run_componets', 'RunData', 'reword_input', 'tsfm_nm_rephrase',
-           'write_to_jsonl', 'validate_jsonl']
+__all__ = ['client', 'chat', 'fetch_run_componets', 'RunData', 'write_to_jsonl', 'validate_jsonl']
 
 # %% ../nbs/02_transform.ipynb 2
 import os, copy, json
@@ -33,44 +32,7 @@ def chat(**kwargs):
     client.api_key = os.environ['OPENAI_API_KEY']
     return client.chat.completions.create(**kwargs)
 
-# %% ../nbs/02_transform.ipynb 6
-def rephrase(sentence, max_tokens=100, temperature=0.95):
-    "Rephrase a sentence. Useful for data augmentation for finetuning."
-    client.api_key = os.environ['OPENAI_API_KEY']
-    response = chat(
-        temperature=temperature,
-        max_tokens=max_tokens,
-        model="gpt-4", 
-        messages=[
-         {"role": "system", "content": "You are a helpful assistant."},
-         {"role": "user", "content": f"Rephrase the following sentence in one short sentence: {sentence}"}
-        ]
-    )
-    return response.choices[0].message.content.strip()
-
 # %% ../nbs/02_transform.ipynb 8
-def _gen_name():
-    "Generate a random name"
-    client.api_key = os.environ['OPENAI_API_KEY']
-    response = chat(
-        temperature=1.9,
-        max_tokens=4,
-        model="gpt-3.5-turbo", 
-        messages=[
-         {"role": "system", "content": "You are a helpful assistant."},
-         {"role": "user", "content": f"Imagine a full name for a person. Only return a first and last name."}
-        ]
-    )
-    return response.choices[0].message.content.strip().replace('.', '')
-
-def gen_name():
-    "Generate a random name, usefule for data augmentation and privacy."
-    while True:
-        nm = _gen_name()
-        if len(nm) <= 18:
-            return nm
-
-# %% ../nbs/02_transform.ipynb 12
 def fetch_run_componets(run_id:str):
     "Return the `inputs`, `output` and `funcs` for a run of type `ChatOpenAI`."
     client = langsmith.Client()
@@ -86,7 +48,7 @@ def fetch_run_componets(run_id:str):
     funcs = params.get("functions", [])
     return inputs, output, funcs
 
-# %% ../nbs/02_transform.ipynb 15
+# %% ../nbs/02_transform.ipynb 11
 class RunData(BaseModel):
     "Key components of a run from LangSmith"
     inputs:List[dict]
@@ -145,45 +107,7 @@ def _flatten_data(cls, data):
             md_str += "\n"
         return md_str
 
-# %% ../nbs/02_transform.ipynb 28
-def _sub_name_in_func(funcs, name):
-    "Substitute 'Unit Test' for `name` in the `email-campaign-creator` function"
-    emailfunc = L(funcs).filter(lambda x: x['name'] == 'email-campaign-creator')
-    if emailfunc:
-        func = emailfunc[0]
-        desc = func['parameters']['properties']['body']['description']
-        new_desc = desc.replace('Unit Test', name)
-        func['parameters']['properties']['body']['description'] = new_desc
-    return funcs
-
-# %% ../nbs/02_transform.ipynb 30
-def _sub_name_in_output(output, name):
-    "Subtitute `[Your Name]` with `name` in the output."
-    output['content'] = output['content'].replace('[Your Name]', name)
-    return output
-
-# %% ../nbs/02_transform.ipynb 32
-def reword_input(inputs):
-    "Rephrase the first human input."
-    copy_inputs = copy.deepcopy(inputs)
-    for inp in copy_inputs:
-        if inp['role'] == 'user': 
-            inp['content'] = rephrase(inp['content'])
-            print(f"rephrased input as: {inp['content']}")
-            break
-    return copy_inputs
-
-# %% ../nbs/02_transform.ipynb 34
-def tsfm_nm_rephrase(rundata:RunData, name=None) -> RunData:
-    "Substitutes names in functions & outputs and rephrases the language model input."
-    if name is None: name=gen_name()                    # generates a random name to be used to substitute a boilerplate name
-    print(f'Substituting name: {name}')
-    inputs = reword_input(rundata.inputs)              # rephrases the input to the language model
-    output = _sub_name_in_output(rundata.output, name)  # substitutes the template `[Your Name]` with `name` in the output of the language model
-    funcs = _sub_name_in_func(rundata.funcs, name)      # substitutes the template `[Your Name]` with `name` in the a function schema description
-    return RunData(inputs=inputs, output=output, funcs=funcs, run_id=rundata.run_id)
-
-# %% ../nbs/02_transform.ipynb 39
+# %% ../nbs/02_transform.ipynb 24
 def write_to_jsonl(data_list:List[RunData], filename:str):
     """
     Writes a list of dictionaries to a .jsonl file.
@@ -197,7 +121,7 @@ def write_to_jsonl(data_list:List[RunData], filename:str):
         for entry in data_list:
             f.write(f"{entry.to_json()}\n")
 
-# %% ../nbs/02_transform.ipynb 42
+# %% ../nbs/02_transform.ipynb 27
 def validate_jsonl(fname):
     "Code is modified from https://cookbook.openai.com/examples/chat_finetuning_data_prep, but updated for function calling."
     # Load the dataset

diff --git a/nbs/01_runs.ipynb b/nbs/01_runs.ipynb
@@ -291,8 +291,7 @@
    "outputs": [],
    "source": [
     "_child_runs = get_last_child(take(_runs, 3))\n",
-    "assert _child_runs[0].child_run_ids is None # the child doesn't have other children\n",
-    "assert _child_runs[0].execution_order != 1  # the child shouldn't be executed first"
+    "assert _child_runs[0].child_run_ids is None # the child doesn't have other children"
    ]
   },
   {
@@ -346,7 +345,7 @@
      "text": [
       "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2023-10-04\"), lte(start_time, \"2023-10-05\"))\n",
       "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2023-10-03\"), lte(start_time, \"2023-10-06\"))\n",
-      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-01-09\"), lte(start_time, \"2024-01-12\"))\n"
+      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-02-16\"), lte(start_time, \"2024-02-19\"))\n"
      ]
     }
    ],
@@ -405,14 +404,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-01-09\"), lte(start_time, \"2024-01-12\"))\n",
+      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-02-16\"), lte(start_time, \"2024-02-19\"))\n",
       "| start_dt   | commit   |   count |\n",
       "|:-----------|:---------|--------:|\n",
-      "| 01/11/2024 | 393fbebf |     619 |\n",
-      "| 01/11/2024 | aee7dd6e |     608 |\n",
-      "| 01/09/2024 | aee7dd6e |    1209 |\n",
-      "| 01/09/2024 | 887296e3 |     607 |\n",
-      "| 01/09/2024 | 393fbebf |     549 |\n"
+      "| 02/18/2024 | 501106cc |     648 |\n"
      ]
     }
    ],
@@ -437,7 +432,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-01-09\"), lte(start_time, \"2024-01-12\"))\n"
+      "Fetching runs with this filter: and(eq(status, \"success\"), gte(start_time, \"2024-02-16\"), lte(start_time, \"2024-02-19\"))\n"
      ]
     }
    ],
@@ -609,7 +604,7 @@
     {
      "data": {
       "text/plain": [
-       "[{'key': 'Empty Response',\n",
+       "[{'key': 'empty response',\n",
        "  'score': 0.0,\n",
        "  'value': None,\n",
        "  'comment': \"expected '' to have a length above 0 but got 0\",\n",
@@ -624,7 +619,7 @@
    "source": [
     "_feedback = get_feedback(client.read_run('7aba254d-3812-4050-85a5-ed64af50d2f1'))\n",
     "assert _feedback[0]['score'] == 0\n",
-    "assert _feedback[0]['key'] == 'Empty Response'\n",
+    "assert _feedback[0]['key'] == 'empty response'\n",
     "_feedback"
    ]
   },