First, we'll do some setup. Create a LangSmith API Key by navigating to the settings page in LangSmith, then set the following environment variables.
```
OPENAI_API_KEY=<YOUR OPENAI API KEY>
LANGCHAIN_TRACING_V2=true
LANGCHAIN_PROJECT=<YOUR PROJECT NAME>
LANGCHAIN_API_KEY=<YOUR LANGSMITH API KEY>
```

In [3]:
from langsmith import Client

client = Client()

## Pt. 1 -- Toxic Queries

In [4]:
toxic_examples = [
    ("Shut up, idiot", "Toxic"),
    ("You're a wonderful person", "Not toxic"),
    ("This is the worst thing ever", "Toxic"),
    ("I had a great day today", "Not toxic"),
    ("Nobody likes you", "Toxic"),
    ("This movie is a masterpiece", "Not toxic"),
    ("Go away and never come back", "Toxic"),
    ("Thank you for your help", "Not toxic"),
    ("This is so dumb", "Toxic"),
    ("I appreciate your efforts", "Not toxic"),
    ("This is a waste of time", "Toxic"),
    ("This movie blows", "Toxic"),
    ("This is unacceptable. I want to speak to the manager.", "Toxic"),
]

toxic_dataset_name = "Toxic Queries"
toxic_dataset = client.create_dataset(dataset_name=toxic_dataset_name)
inputs, outputs = zip(*[({"text": text}, {"label": label}) for text, label in toxic_examples])
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=toxic_dataset.id
)

In [5]:
from openai import OpenAI
from langsmith import wrappers, traceable

openai = wrappers.wrap_openai(OpenAI())

In [6]:
@traceable
def label_query(text):
    messages = [
        {
            "role": "system",
            "content": "Given the following user query, assess whether it contains toxic content. Please provide a simple 'Toxic' or 'Not toxic' response based on your assessment.",
        },
        {"role": "user", "content": text},
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-3.5-turbo", temperature=0
    )
    return result.choices[0].message.content


@traceable
def label_query_alternate_prompt(text):
    messages = [
        {
            "role": "system",
            "content": "Please review the user query below and determine if it contains any form of toxic behavior, such as insults, threats, or highly negative comments. Respond with 'Toxic' if it does, and 'Not toxic' if it doesn't.",
        },
        {"role": "user", "content": text},
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-3.5-turbo", temperature=0
    )
    return result.choices[0].message.content

In [7]:
from langchain.smith import RunEvalConfig
from langsmith.evaluation import EvaluationResult

def correct_label(run, example) -> EvaluationResult:
    score = run.outputs.get("output") == example.outputs.get("label")
    return EvaluationResult(key="correct_label", score=int(score))


def batch_eval(runs, examples):
    correct = 0
    for i, run in enumerate(runs):
        if run.outputs["output"] == examples[i].outputs["label"]:
            correct += 1
    if correct / len(runs) > 0.5:
        return {"key": "pass", "score": True}
    else:
        return {"key": "pass", "score": False}


eval_config = RunEvalConfig(
    custom_evaluators=[correct_label], batch_evaluators=[batch_eval]
)

In [8]:
results_1 = client.run_on_dataset(
    dataset_name=toxic_dataset_name,
    llm_or_chain_factory=label_query,
    evaluation=eval_config,
    project_name="toxic queries prompt 1",
    project_metadata={
        "prompt_version": "1",
    },
)

View the evaluation results for project 'toxic queries prompt 1' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a/compare?selectedSessions=f33774fe-a0ca-4db5-81e4-b63902b4b4ff

View all tests for Dataset Toxic Queries at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a
[------------------------------------------------->] 13/13

In [9]:
results_2 = client.run_on_dataset(
    dataset_name=toxic_dataset_name,
    llm_or_chain_factory=label_query_alternate_prompt,
    evaluation=eval_config,
    project_name="toxic queries prompt 2",
    project_metadata={
        "prompt_version": "2",
    },
)

View the evaluation results for project 'toxic queries prompt 2' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a/compare?selectedSessions=70ca4d89-020f-42d9-9943-87318e5dc81c

View all tests for Dataset Toxic Queries at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a
[------------------------------------------------->] 13/13

### Aside: Using the LangSmith Hub for Prompt Management

In [10]:
from langchain import hub
from langchain_openai.chat_models.base import _convert_message_to_dict

HUB_COMMIT_HASH = "f356b09d"
obj = hub.pull(f"ankush-movie-demo0306/toxic-query:{HUB_COMMIT_HASH}")
hub_messages = [_convert_message_to_dict(message.format()) for message in obj.messages[:1]]


@traceable
def label_query_hub(text):
    messages = hub_messages + [{"role": "user", "content": text}]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-3.5-turbo", temperature=0
    )
    return result.choices[0].message.content


results = client.run_on_dataset(
    dataset_name=toxic_dataset_name,
    llm_or_chain_factory=label_query_hub,
    evaluation=eval_config,
    project_name=f"toxic queries prompt @{HUB_COMMIT_HASH}",
    project_metadata={
        "prompt_version": HUB_COMMIT_HASH,
    },
)

View the evaluation results for project 'toxic queries prompt @f356b09d' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a/compare?selectedSessions=159396d7-ca20-49b1-aa9b-c3d23af958ea

View all tests for Dataset Toxic Queries at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a
[------------------------------------------------->] 13/13

## Pt. 2 -- Multi-Turn Queries

In [11]:
# Define multi-turn examples
multi_turn_examples = [
    (
        [
            "Recommend some family-friendly movies for tonight",
            "Do any of these have an educational theme?",
            "Which one has the highest ratings?",
        ],
        [
            "Some family-friendly movies available are 'The Lion King', 'Finding Nemo', and 'The Incredibles'",
            "'The Lion King' and 'Finding Nemo' have educational themes about the circle of life and the importance of family",
            "'The Incredibles' has the highest ratings among them with a 94% on Rotten Tomatoes",
        ],
    ),
    (
        [
            "What are the top sci-fi movies on your service?",
            "Any recent ones?",
            "Can you suggest one that involves time travel?",
        ],
        [
            "Top sci-fi movies include 'Blade Runner 2049', 'Interstellar', and 'The Martian'",
            "A recent hit is 'Tenet', released in 2020",
            "'Interstellar' involves complex time travel themes and is highly recommended",
        ],
    ),
    (
        [
            "I'm looking for movies directed by Christopher Nolan",
            "Which one would you recommend for a movie night?",
            "What's the plot of 'Inception'?",
        ],
        [
            "Christopher Nolan movies available include 'Inception', 'Dunkirk', and 'Interstellar'",
            "'Inception' is a great pick for a movie night, offering a mix of action, drama, and mind-bending storytelling",
            "'Inception' is about a thief who steals corporate secrets through dream-sharing technology and is given the inverse task of planting an idea into the mind of a CEO",
        ],
    ),
    (
        [
            "Show me some popular romantic comedies",
            "Any classics in the list?",
            "Tell me more about 'When Harry Met Sally'",
        ],
        [
            "Popular romantic comedies include 'Crazy Rich Asians', 'The Big Sick', and 'When Harry Met Sally'",
            "'When Harry Met Sally' is considered a classic in the romantic comedy genre",
            "'When Harry Met Sally' explores the question of whether men and women can just be friends, through the story of its titular characters over the years",
        ],
    ),
    (
        [
            "Do you have documentaries on nature?",
            "Which one focuses on marine life?",
            "How long is 'Blue Planet II'?",
        ],
        [
            "Yes, we have 'Planet Earth II', 'Blue Planet II', and 'Our Planet'",
            "'Blue Planet II' focuses extensively on marine life, exploring the deep ocean, coral reefs, and the open sea",
            "'Blue Planet II' is approximately 7 hours long, spread across 7 episodes",
        ],
    ),
]

multi_turn_dataset_name = "Multi-Turn Queries"
multi_turn_dataset = client.create_dataset(dataset_name=multi_turn_dataset_name)
multi_turn_inputs, multi_turn_outputs = zip(*[({"queries": queries}, {"answers": answers}) for queries, answers in multi_turn_examples])
client.create_examples(
    inputs=multi_turn_inputs,
    outputs=multi_turn_outputs,
    dataset_id=multi_turn_dataset.id,
)

In [12]:
import json

tools = [
    {
        "type": "function",
        "function": {
            "name": "retrieve_movies",
            "description": "Retrieve a list of relevant movies and their metadata from a movie database.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query used to retrieve movies from the movie database, for example 'Christopher Nolan films'",
                    },
                },
                "required": ["query"],
            },
        },
    },
]

system_prompt = """
Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.
Note that if the question does not require additional search and can be answered using the chat history, simply respond with the answer.
Don't make up content that's not supplied in chat history.
"""


@traceable
def generate_movie_search(chat_history, query):
    messages = (
        [
            {"role": "system", "content": system_prompt},
        ]
        + chat_history
        + [{"role": "user", "content": query}]
    )
    result = openai.chat.completions.create(
        messages=messages, model="gpt-3.5-turbo-0613", tools=tools
    )
    return result.choices[0].message


def _convert_docs(results):
    return [
        {
            "page_content": r,
            "type": "Document",
        }
        for r in results
    ]


@traceable(run_type="retriever")
def retrieve_movies(query):
    # Foo retriever. In production, this would search an actual database
    if "family-friendly" in query.lower():
        return _convert_docs(["Lion King", "Finding Nemo", "The Incredibles"])
    elif "sci-fi" in query.lower():
        return _convert_docs(["Blade Runner 2049", "Interstellar", "The Martian"])
    elif "nature" in query.lower():
        return _convert_docs(["Planet Earth II", "Blue Planet II", "Our Planet"])
    elif "christopher nolan" in query.lower():
        return _convert_docs(["Inception", "Dunkirk", "Interstellar"])
    else:
        return _convert_docs(
            ["Crazy Rich Asians", "The Big Sick", "When Harry Met Sally"]
        )


@traceable
def execute_function_call(message):
    if message.tool_calls[0].function.name == "retrieve_movies":
        query = json.loads(message.tool_calls[0].function.arguments)["query"]
        results = retrieve_movies(query)
    else:
        results = (
            f"Error: function {message.tool_calls[0].function.name} does not exist"
        )
    return results


@traceable
def generate_answer(question, context):
    messages = [
        {
            "role": "system",
            "content": f"Answer the user's question based only on the content below:\n\n{context}",
        },
        {"role": "user", "content": question},
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-3.5-turbo", temperature=0
    )
    return result.choices[0].message.content


@traceable
def rag_pipeline(chat_history, question):
    message = generate_movie_search(chat_history, question)
    if message.tool_calls is None:
        return message.content
    else:
        docs = execute_function_call(message)
        context = "\n".join([doc["page_content"] for doc in docs])
        return generate_answer(question, context)

In [13]:
@traceable
def run_multi_turn(queries):
    turns = queries
    chat_history, outputs = [], []
    for turn in turns:
        output = rag_pipeline(chat_history, turn)
        chat_history.append({"role": "user", "content": turn})
        chat_history.append({"role": "assistant", "content": output})
        outputs.append(output)
    return outputs

In [14]:
from langchain.smith import RunEvalConfig
from langsmith.evaluation import EvaluationResult

def brief_response(run, example) -> EvaluationResult:
    convo = run.outputs.get("output")
    for turn in convo:
        if len(turn) > 200:
            return EvaluationResult(key="brevity", score=0)
    return EvaluationResult(key="brevity", score=1)


eval_config = RunEvalConfig(custom_evaluators=[brief_response])

In [15]:
multi_turn_dataset_name = "Multi-Turn Queries"
results = client.run_on_dataset(
    dataset_name=multi_turn_dataset_name,
    llm_or_chain_factory=run_multi_turn,
    evaluation=eval_config,
    project_name="multi turn eval",
    project_metadata={
        "model": "gpt-3.5-turbo",
        "prompt_version": "003",
    }
)

View the evaluation results for project 'multi turn eval' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/df70c082-ceed-45d9-93d4-eaf87752a6bf/compare?selectedSessions=cad8585e-7198-41b6-8827-599190df6692

View all tests for Dataset Multi-Turn Queries at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/df70c082-ceed-45d9-93d4-eaf87752a6bf
[------------------------------------------------->] 5/5

## Pt. 3 -- Structured Inputs

In [16]:
structured_input_examples = [
    (
        {
            "user_preferences": ["Sci-Fi", "Action"],
            "watch_history": ["The Matrix", "Inception"],
            "search_query": "What to watch next?",
        },
        "Based on your love for Sci-Fi and Action movies, and considering you've recently watched 'The Matrix' and 'Inception', you might enjoy 'Blade Runner 2049' for its deep narrative and stunning visuals.",
        # Example adding notes + metadata
        {
            "note": "This is a free-form note"
        }
    ),
    (
        {
            "user_preferences": ["Drama", "Historical"],
            "watch_history": ["The Crown", "Downton Abbey"],
            "search_query": "Looking for a movie with a strong storyline",
        },
        "Given your interest in Drama and Historical themes, and your watch history, 'The King's Speech' offers a compelling storyline with remarkable performances.",
        {
            "note": "This is another free_form note.",
            "cohort_number": 3
        },
    ),
    (
        {
            "user_preferences": ["Comedy", "Romance"],
            "watch_history": ["Friends", "The Big Bang Theory"],
            "search_query": "Need a light-hearted movie",
        },
        "Considering your preference for Comedy and Romance, along with enjoying shows like 'Friends', you'd likely enjoy 'Crazy Rich Asians' for its humor and heartwarming romance.",
    ),
    (
        {
            "user_preferences": ["Thriller", "Mystery"],
            "watch_history": ["Sherlock", "Mindhunter"],
            "search_query": "Suggest a suspenseful movie",
        },
        "With your taste leaning towards Thriller and Mystery, and considering you've watched 'Sherlock' and 'Mindhunter', 'Gone Girl' would be an excellent choice for its suspense and plot twists.",
        
    ),
    (
        {
            "user_preferences": ["Documentary", "Nature"],
            "watch_history": ["Planet Earth", "Blue Planet II"],
            "search_query": "Want to watch something about wildlife",
        },
        "Your interest in Documentaries and Nature, along with watching 'Planet Earth' and 'Blue Planet II', suggests you would enjoy 'The Serengeti Rules', which beautifully captures wildlife and ecosystems.",
    ),
    (
        {
            "user_preferences": ["Fantasy", "Adventure"],
            "watch_history": ["Harry Potter series", "The Hobbit"],
            "search_query": "Fantasy movies for the weekend?",
        },
        "Given your love for Fantasy and Adventure, having watched the 'Harry Potter series' and 'The Hobbit', 'The Witcher' series would be a fantastic choice for your weekend binge.",
    ),
    (
        {
            "user_preferences": ["Animation", "Family"],
            "watch_history": ["Finding Nemo", "Toy Story"],
            "search_query": "Animated movies that are fun for all ages?",
        },
        "With a preference for Animation and Family-friendly content, and given your history with 'Finding Nemo' and 'Toy Story', 'Coco' is highly recommended for its fun story and universal appeal.",
    ),
    (
        {
            "user_preferences": ["Horror", "Supernatural"],
            "watch_history": ["The Haunting of Hill House", "Stranger Things"],
            "search_query": "Scary movies that aren’t too gory?",
        },
        "As a fan of Horror and Supernatural genres, and having enjoyed 'The Haunting of Hill House' and 'Stranger Things', 'A Quiet Place' offers suspense without relying on gore.",
    ),
    (
        {
            "user_preferences": ["Musical", "Drama"],
            "watch_history": ["La La Land", "The Greatest Showman"],
            "search_query": "Musicals with a strong emotional core?",
        },
        "Your enjoyment of Musicals and Drama, seen in 'La La Land' and 'The Greatest Showman', means you might find 'Les Misérables' to be a powerful experience with its deep emotional resonance.",
    ),
    (
        {
            "user_preferences": ["Crime", "Legal Drama"],
            "watch_history": ["Breaking Bad", "Better Call Saul"],
            "search_query": "Engaging legal dramas?",
        },
        "Considering your interest in Crime and Legal Drama, with 'Breaking Bad' and 'Better Call Saul' in your watch history, 'The Trial of the Chicago 7' is recommended for its engaging narrative and historical significance.",
    ),
]

structured_input_dataset_name = "Structured Inputs"
structured_input_dataset = client.create_dataset(
    dataset_name=structured_input_dataset_name
)
for input_tuple in structured_input_examples:
    metadata = None
    if len(input_tuple) == 3:
        inputs, answer, metadata = input_tuple
    else:
        inputs, answer = input_tuple
    client.create_example(
        inputs=inputs,
        outputs={"answer": answer},
        dataset_id=structured_input_dataset.id,
        metadata=metadata,
    )

In [17]:
system_prompt_template = """Respond to the user's search query given what you know about them.

You know they just watched: {watch_history}

You know they have explicited stated preferences for: {user_preferences}"""


@traceable
def generate_recommendation(search_query, watch_history, user_preferences):
    system_prompt = system_prompt_template.format(
        watch_history=watch_history, user_preferences=user_preferences
    )
    messages = [
        {"role": "system", "content": system_prompt},
    ] + [{"role": "user", "content": search_query}]
    result = openai.chat.completions.create(
        messages=messages,
        model="gpt-3.5-turbo",
    )
    return result.choices[0].message.content

In [18]:
eval_config = RunEvalConfig(evaluators=["cot_qa"], input_key="search_query")
structured_input_dataset_name = "Structured Inputs"
result = client.run_on_dataset(
    dataset_name=structured_input_dataset_name,
    llm_or_chain_factory=generate_recommendation,
    evaluation=eval_config,
    project_name="recommendations",
)

View the evaluation results for project 'recommendations' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/36289877-3abb-474c-8e44-b083ba02113e/compare?selectedSessions=fe9e5f65-5191-4380-9da4-2afe1734b1b0

View all tests for Dataset Structured Inputs at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/36289877-3abb-474c-8e44-b083ba02113e
[------------------------------------------------->] 10/10

## Pt. 4 -- Dataset Versioning & Metadata

Every time an example is created, updated, or deleted, a new dataset version is saved and can be
retrieved by querying the examples `as_of` that modified time.

You can save "semantic" versions of the dataset by tagging specific times with names.

A tag can be assigned to at most 1 version at a time.

In [19]:
import datetime

examples = list(client.list_examples(dataset_name=toxic_dataset_name))
initial_time = max([e.modified_at for e in examples])
len(examples)

13

In [20]:
example = client.create_example(
    inputs={"text": "hi there"},
    outputs={"label": "Not toxic"},
    metadata={"recent": True},
    dataset_name=toxic_dataset_name,
)

In [21]:
len(
    list(
        client.list_examples(
            dataset_name=toxic_dataset_name,
            as_of=datetime.datetime.now(tz=datetime.timezone.utc),
        )
    )
)

14

In [22]:
# Check for the time at which we first ran
len(
    list(
        client.list_examples(
            dataset_name=toxic_dataset_name, as_of=initial_time,
        )
    )
)

13

In [23]:
# You can tag a specific dataset version with a semantic name, like "prod"
client.update_dataset_tag(dataset_name=toxic_dataset_name, as_of=initial_time, tag="prod")

In [24]:
# You can then query the dataset for that version
len(
    list(
        client.list_examples(
            dataset_name=toxic_dataset_name, as_of="prod",
        )
    )
)

13

In [25]:
from_version = "prod"
to_version = "latest"
diff = client.diff_dataset_versions(
    dataset_name=toxic_dataset_name,
    from_version=from_version,
    to_version=to_version,
)
print(diff)

examples_modified=[] examples_added=[UUID('eb752095-10f4-4a18-82b0-58de1593bf51')] examples_removed=[]


In [27]:
# You can then use tags to continue to evaluate on the same version of a dataset
# Only updating your testing flow once you are ready to commit to a new version
eval_config = RunEvalConfig(
    custom_evaluators=[correct_label], batch_evaluators=[batch_eval]
)
result = client.run_on_dataset(
    dataset_name=toxic_dataset_name,
    llm_or_chain_factory=label_query,
    evaluation=eval_config,
    project_name="dataset versioning example",
    dataset_version="prod",
    project_metadata={
        "prompt_version": "001",
    },
)

View the evaluation results for project 'dataset versioning example' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a/compare?selectedSessions=5aca56c3-2c36-4b04-8db1-019fd4d48411

View all tests for Dataset Toxic Queries at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/1d69b70b-6017-426d-8de8-b6ef8280519a
[------------------------------------------------->] 13/13

# Pt. 5 -- Proxy

In [28]:
from openai import OpenAI
from langsmith import wrappers, traceable

openai = wrappers.wrap_openai(
    OpenAI(
        base_url="http://localhost:8080/proxy/openai",
    )
)

In [29]:
system_prompt = (
    """Generate a three paragraph description of a movie about this topic: {topic}. Do not specify a title."""
)


@traceable
def generate_movie(topic):
    messages = [
        {"role": "user", "content": system_prompt.format(topic=topic)},
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-4"
    )
    return result.choices[0].message.content

In [30]:
@traceable
def generate_title(description):
    messages = [
        {
            "role": "user",
            "content": f"Generate a title for the following movie description:\n\n{description}.",
        },
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-4"
    )
    return result.choices[0].message.content

In [31]:
@traceable
def pipeline(topic):
    description = generate_movie(topic)
    title = generate_title(description)
    return {"description": description, "title": title}

In [32]:
movie_creation_examples = ["soccer", "a pop star", "action movie in venice"]

movie_creation_dataset_name = "Movie Creation"
movie_dataset = client.create_dataset(dataset_name=movie_creation_dataset_name)
for topic in movie_creation_examples:
    client.create_example(inputs={"topic": topic}, dataset_id=movie_dataset.id)

In [33]:
result = client.run_on_dataset(
    dataset_name=movie_creation_dataset_name,
    llm_or_chain_factory=pipeline,
    project_name="cold cache",
    project_metadata={
        "prompt_version": "1",
    }
)

View the evaluation results for project 'cold cache' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/26db3352-e870-48b3-910b-e3cd003b0ab4/compare?selectedSessions=d8292996-db53-4ea8-92fd-6b0a3b26ec30

View all tests for Dataset Movie Creation at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/26db3352-e870-48b3-910b-e3cd003b0ab4
[------------------------------------------------->] 3/3

In [34]:
@traceable
def generate_title(description):
    messages = [
        {
            "role": "user",
            "content": f"Generate a title in SPANISH for the following movie description:\n\n{description}.",
        },
    ]
    result = openai.chat.completions.create(
        messages=messages, model="gpt-4"
    )
    return result.choices[0].message.content

In [35]:
@traceable
def pipeline(topic):
    description = generate_movie(topic)
    title = generate_title(description)
    return {"description": description, "title": title}

In [36]:
result = client.run_on_dataset(
    dataset_name=movie_creation_dataset_name,
    llm_or_chain_factory=pipeline,
    project_name="warm cache",
    project_metadata={
        "prompt_version": "2",
    }
)

View the evaluation results for project 'warm cache' at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/26db3352-e870-48b3-910b-e3cd003b0ab4/compare?selectedSessions=e5b69871-cd7f-433b-a29f-4a75bf7b4094

View all tests for Dataset Movie Creation at:
https://smith.langchain.com/o/8d28a774-8361-496d-a5d4-dd582a8d1b10/datasets/26db3352-e870-48b3-910b-e3cd003b0ab4
[------------------------------------------------->] 3/3