## Running chains on Datasets

Some setup to upload the dataset to LangChainPlus. 

In [None]:
!pip install datasets > /dev/null

You'll already have a dataset in LangChain+, but we'll upload this as an example

In [1]:
import os
import pandas as pd
from io import BytesIO
import requests
from typing import Sequence, Tuple, Optional

def upload_dataframe(base_url: str,
                     df: pd.DataFrame,
                     input_keys: Sequence[str], 
                     output_keys: Sequence[str], 
                     name: str, 
                     description: str, auth: Optional[dict] = None) -> Tuple[int, dict]:
    buffer = BytesIO()
    df.to_csv(buffer, index=False)
    buffer.seek(0)
    files = {"file": (f"{name}.csv", buffer)}
    data = {
        "input_keys": ','.join(input_keys),
        "output_keys": ','.join(output_keys),
        "description": description,
    }
    response = requests.post(base_url + "/datasets/upload", auth=auth, data=data, files=files)
    return response.status_code, response.json()

In [2]:
endpoint = os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")

In [6]:
from langchain.evaluation.loading import load_dataset
dataset = load_dataset("agent-search-calculator")
df = pd.DataFrame(dataset)
del df['steps']
df.columns = ['input', 'output']
dataset_name = "agent-search-calculator-dataset-test"
response_code, _  = upload_dataframe(endpoint,df, ["input"], ["output"], dataset_name, "A math dataset") 

Found cached dataset json (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-search-calculator-8a025c0ce5fb99d2/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/1 [00:00<?, ?it/s]

## Running a Chain on a Dataset
The actual "running on a dataset" logic starts here

In [8]:
dataset = requests.get(endpoint +"/datasets", params={"name": f"{dataset_name}.csv"}).json()[0]

In [9]:
import asyncio
import logging

from langchain.callbacks.tracers.langchain import LangChainTracer
from langchain.chains.base import Chain

logger = logging.getLogger(__name__)


async def arun_chain(
    example: dict, langchain_tracer: LangChainTracer, chain: Chain
) -> dict:
    """Run the chain asynchronously"""
    langchain_tracer.example_id = example["id"]
    inputs = example["inputs"]
    try:
        chain_output = await chain.arun(inputs, callbacks=[langchain_tracer])
        langchain_tracer.example_id = None
    except Exception as e:
        logger.error(e)
        return {"Error": str(e)}
    finally:
        langchain_tracer.example_id = None
    return chain_output


async def run_dataset(dataset: dict, chain: Chain, batch_size: int = 5):
    """Grade the QA examples."""
    logger.info("`Grading QA performance ...`")
    tracers = [LangChainTracer() for _ in range(batch_size)]
    for tracer in tracers:
        tracer.load_session("default")
    graded_outputs = []
    total_examples = len(dataset["examples"])
    for i in range(0, total_examples, batch_size):
        batch_results = []
        for j in range(i, min(total_examples, i + batch_size)):
            example = dataset["examples"][j]
            batch_results.append(arun_chain(example, tracers[j % len(tracers)], chain))
        graded_outputs.extend(await asyncio.gather(*batch_results))
    return graded_outputs


In [12]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, load_tools
from langchain.agents import AgentType
from langchain import OpenAI

llm = ChatOpenAI(temperature=0)
tools = load_tools(['serpapi', 'llm-math'], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)

In [14]:
results = await run_dataset(dataset, agent)
results

'age'. Please try again with a valid numerical expression
unknown format from LLM: Assuming we don't have any information about the actual number of points scored in the 2023 super bowl, we cannot provide a mathematical expression to solve this problem.
invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression
Could not parse LLM output: `The final answer is that there were no more points scored in the 2023 Super Bowl than in the 2022 Super Bowl.`


['The current population of Canada as of May 3, 2023 is 38,677,281.',
 "Anwar Hadid's age raised to the .43 power is approximately 2.68.",
 {'Error': "'age'. Please try again with a valid numerical expression"},
 'The distance between Paris and Boston is 3448 miles.',
 {'Error': "unknown format from LLM: Assuming we don't have any information about the actual number of points scored in the 2023 super bowl, we cannot provide a mathematical expression to solve this problem."},
 {'Error': 'invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression'},
 {'Error': 'Could not parse LLM output: `The final answer is that there were no more points scored in the 2023 Super Bowl than in the 2022 Super Bowl.`'},
 '1.9347796717823205',
 "Bad Bunny's height (in inches) raised to the .13 power is approximately 1.740 inches.",
 '0.2791714614499425']

scratch
----------

ignore below

In [None]:
# from langchain import PromptTemplate
# template = """You are assessing a submitted student answer to a question relative to the true answer based on the provided criteria: 
    
#     ***
#     QUESTION: {query}
#     ***
#     STUDENT ANSWER: {result}
#     ***
#     TRUE ANSWER: {answer}
#     ***
#     Criteria: 
#       relevance:  Is the submission referring to a real quote from the text?"
#       conciseness:  Is the answer concise and to the point?"
#       correct: Is the answer correct?"
#     ***
#     Does the submission meet the criterion? First, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print "Correct" or "Incorrect" (without quotes or punctuation) on its own line corresponding to the correct answer.
#     Reasoning:
# """

# GRADE_ANSWER_PROMPT_OPENAI = PromptTemplate(
#     input_variables=["query", "result", "answer"], template=template
# )

# eval_chain = QAEvalChain.from_llm(
#     llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0),
#     prompt=GRADE_ANSWER_PROMPT_OPENAI,
# )



In [None]:
# # TODO: Update the runs syntax
# chain_runs = requests.get(endpoint + "/chain-runs").json()
# chain_run_ids = [chain_run['id'] for chain_run in chain_runs]
# from typing import List, Optional
# import pandas as pd
# from io import BytesIO
# import json

# # Could do runs or whatever
# def create_dataset(chain_run_ids: List[str], name: str, description: str, auth: Optional[dict] = None) -> str:
#     examples = []
#     for run_id in chain_run_ids:
#         run_url = endpoint + f"/chain-runs/{run_id}"
#         run_response = requests.get(run_url).json()
#         examples.append({"chain_runs": run_response})
#         values["inputs"].append(run_response["inputs"])
#         values["outputs"].append((run_response["outputs"])
#     dataset_request_body = {"name": "foo", 
#                         "description": "bar", 
#                         "examples": [],
#                        }
#     response = requests.post(endpoint + "/datasets", json=dataset_request_body)
#     return response.json()
