Skip to content

Commit

Permalink
Merge pull request #5403 from langchain-ai/dqbd/run-on-dataset-criter…
Browse files Browse the repository at this point in the history
…ia-fix

fix[runOnDataset]: accept custom criteria in config factory
  • Loading branch information
dqbd committed May 20, 2024
2 parents a8236e4 + b4b1f05 commit 05c4c76
Showing 1 changed file with 57 additions and 7 deletions.
64 changes: 57 additions & 7 deletions langchain/src/smith/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,24 @@ const getSingleStringifiedValue = (value: unknown) => {
* @example
* ```ts
* const evalConfig = {
* evaluators: [Criteria("helpfulness")],
* };
* @example
* ```ts
* const evalConfig = {
* evaluators: [
* Criteria({
* "isCompliant": "Does the submission comply with the requirements of XYZ"
* })
* ],
* };
* @example
* ```ts
* const evalConfig = {
* evaluators: [{
* evaluatorType: "criteria",
* criteria: "helpfulness"
* formatEvaluatorInputs: ...
* }]
* };
* ```
Expand All @@ -224,7 +239,8 @@ const getSingleStringifiedValue = (value: unknown) => {
* const evalConfig = {
* evaluators: [{
* evaluatorType: "criteria",
* criteria: { "isCompliant": "Does the submission comply with the requirements of XYZ"
* criteria: { "isCompliant": "Does the submission comply with the requirements of XYZ" },
* formatEvaluatorInputs: ...
* }]
* };
*/
Expand All @@ -249,7 +265,7 @@ export type Criteria = EvalConfig & {
export type CriteriaEvalChainConfig = Criteria;

export function Criteria(
criteria: CriteriaType,
criteria: CriteriaType | Record<string, string>,
config?: Pick<
Partial<LabeledCriteria>,
"formatEvaluatorInputs" | "llm" | "feedbackKey"
Expand All @@ -262,10 +278,19 @@ export function Criteria(
input: getSingleStringifiedValue(payload.rawInput),
}));

if (typeof criteria !== "string" && Object.keys(criteria).length !== 1) {
throw new Error(
"Only one criteria key is allowed when specifying custom criteria."
);
}

const criteriaKey =
typeof criteria === "string" ? criteria : Object.keys(criteria)[0];

return {
evaluatorType: "criteria",
criteria,
feedbackKey: config?.feedbackKey ?? criteria,
feedbackKey: config?.feedbackKey ?? criteriaKey,
llm: config?.llm,
formatEvaluatorInputs,
};
Expand All @@ -283,9 +308,24 @@ export function Criteria(
* @example
* ```ts
* const evalConfig = {
* evaluators: [LabeledCriteria("correctness")],
* };
* @example
* ```ts
* const evalConfig = {
* evaluators: [
* LabeledCriteria({
* "mentionsAllFacts": "Does the include all facts provided in the reference?"
* })
* ],
* };
* @example
* ```ts
* const evalConfig = {
* evaluators: [{
* evaluatorType: "labeled_criteria",
* criteria: "correctness"
* criteria: "correctness",
* formatEvaluatorInputs: ...
* }],
* };
* ```
Expand All @@ -294,7 +334,8 @@ export function Criteria(
* const evalConfig = {
* evaluators: [{
* evaluatorType: "labeled_criteria",
* criteria: { "mentionsAllFacts": "Does the include all facts provided in the reference?" }
* criteria: { "mentionsAllFacts": "Does the include all facts provided in the reference?" },
* formatEvaluatorInputs: ...
* }],
* };
*/
Expand All @@ -316,7 +357,7 @@ export type LabeledCriteria = EvalConfig & {
};

export function LabeledCriteria(
criteria: CriteriaType,
criteria: CriteriaType | Record<string, string>,
config?: Pick<
Partial<LabeledCriteria>,
"formatEvaluatorInputs" | "llm" | "feedbackKey"
Expand All @@ -330,10 +371,19 @@ export function LabeledCriteria(
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
}));

if (typeof criteria !== "string" && Object.keys(criteria).length !== 1) {
throw new Error(
"Only one labeled criteria key is allowed when specifying custom criteria."
);
}

const criteriaKey =
typeof criteria === "string" ? criteria : Object.keys(criteria)[0];

return {
evaluatorType: "labeled_criteria",
criteria,
feedbackKey: config?.feedbackKey ?? criteria,
feedbackKey: config?.feedbackKey ?? criteriaKey,
llm: config?.llm,
formatEvaluatorInputs,
};
Expand Down

0 comments on commit 05c4c76

Please sign in to comment.