In [3]:
!tar -xvf labelled_contracts.tar.gz

x labelled_contracts/
x labelled_contracts/clauses_contracts.tar.gz
x labelled_contracts/elements_contracts.tar.gz


In [1]:
import uuid

unique_id = uuid.uuid4()

In [None]:
# 

In [None]:
import json

with open("data.json") as f:
    data = json.load(f)

In [2]:
from langsmith import Client

client = Client()
dataset_name = f"bionlp-{unique_id}"
dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(
    inputs=[row["inputs"] for row in data],
    outputs=[{k: json.dumps(v) for k, v in row["outputs"].items()} for row in data],
    dataset_id=dataset.id,
)

In [None]:
from langsmith import Client

client = Client()
run_id = next(client.list_runs(project_name="default")).id

# Define chain

In [None]:
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI

# Schema
schema = {
    "properties": {
        "text": {"type": "string"},
        "semantic_type_id": {
            "title": "Semantic Type Id",
            "anyOf": [
                {"enum": ["GeneOrGeneProduct"], "type": "string"},
                {"enum": ["CellLine"], "type": "string"},
                {"enum": ["ChemicalEntity"], "type": "string"},
                {"enum": ["SequenceVariant"], "type": "string"},
                {"enum": ["OrganismTaxon"], "type": "string"},
                {"enum": ["DiseaseOrPhenotypicFeature"], "type": "string"},
            ],
        },
    },
    "required": ["text", "semantic_type_id"],
}

# Create Chain
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
chain = create_extraction_chain(schema, llm)

## Evaluate

In [None]:
from langchain.evaluation.parsing.base import (
    JsonListAccuracyEvaluator,
    JsonListF1Evaluator,
    JsonListIoUEvaluator,
    JsonListPrecisionEvaluator,
    JsonListRecallEvaluator,
)

evaluators = [
    e()
    for e in [
        JsonListIoUEvaluator,
        JsonListF1Evaluator,
        JsonListPrecisionEvaluator,
        JsonListAccuracyEvaluator,
        JsonListRecallEvaluator,
    ]
]

In [None]:
from langchain import smith
from langsmith.evaluation import EvaluationResult, RunEvaluator


class CustomEvaluator(RunEvaluator):
    def evaluate_run(self, run, example):
        return EvaluationResult(key="Foo", score=1)


evaluation = smith.RunEvalConfig(
    evaluators=["qa"], custom_evaluators=[CustomEvaluator()]
)

In [None]:
def my_cool_chain(the_input):
    return "I am the greatest"


test_results = smith.run_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory=my_cool_chain,
    client=client,
    evaluation=evaluation,
    # concurrency_level=0,
)

In [9]:
test_project = "openai.gpt-4-qa[evaluator=qa] - Carb-IE-Test CORRECT - 02d9f257"
project = client.read_project(project_name=test_project)
print(
    f"Prompt[{project.prompt_tokens}] + Completion[{project.completion_tokens}] = {project.total_tokens}"
)

Prompt[81349] + Completion[194] = 81543


In [14]:
tokens = [
    {
        "completion_tokens": run.completion_tokens,
        "prompt_tokens": run.prompt_tokens,
        "total_tokens": run.total_tokens,
    }
    for run in client.list_runs(
        project_name=test_project,
        run_type="llm",
    )
]