From 789026ffa93f8e1a46245ff7cb305c483b3b7292 Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Wed, 26 Nov 2025 14:31:02 +0200 Subject: [PATCH 1/9] added --- .../src/sample_github_experiment.ts | 127 +++++++++++ .../src/lib/client/experiment/experiment.ts | 200 ++++++++++++++++++ .../lib/interfaces/experiment.interface.ts | 29 +++ .../traceloop-sdk/src/lib/node-server-sdk.ts | 4 + 4 files changed, 360 insertions(+) create mode 100644 packages/sample-app/src/sample_github_experiment.ts diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts new file mode 100644 index 00000000..2ffc3279 --- /dev/null +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -0,0 +1,127 @@ +import * as traceloop from "@traceloop/node-server-sdk"; +import { OpenAI } from "openai"; +import type { + ExperimentTaskFunction, + TaskInput, + TaskOutput, +} from "@traceloop/node-server-sdk"; + +import "dotenv/config"; + +const main = async () => { + console.log("Starting GitHub experiment sample"); + traceloop.initialize({ + appName: "sample_github_experiment", + apiKey: process.env.TRACELOOP_API_KEY, + disableBatch: true, + traceloopSyncEnabled: true, + }); + + try { + await traceloop.waitForInitialization(); + } catch (error) { + console.error( + "Failed to initialize Traceloop SDK:", + error instanceof Error ? error.message : String(error), + ); + console.error("Initialization error details:", error); + process.exit(1); + } + + const client = traceloop.getClient(); + if (!client) { + console.error("Failed to initialize Traceloop client"); + return; + } + + console.log("๐Ÿš€ GitHub Experiment Sample Application"); + console.log("=======================================\n"); + + // Initialize OpenAI client + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + }); + + /** + * Example task function that generates a response using OpenAI + * This will be executed locally in GitHub Actions + */ + const researchTask: ExperimentTaskFunction = async ( + row: TaskInput, + ): Promise => { + const question = row.question as string; + + console.log(`Processing question: ${question}`); + + const response = await openai.chat.completions.create({ + model: "gpt-4", + messages: [ + { + role: "system", + content: "You are a helpful research assistant. Provide concise, accurate answers.", + }, + { role: "user", content: question }, + ], + temperature: 0.7, + max_tokens: 500, + }); + + const answer = response.choices?.[0]?.message?.content || ""; + + return { + question, + answer, + model: "gpt-4", + timestamp: Date.now(), + }; + }; + + try { + console.log("\n๐Ÿงช Running experiment..."); + console.log(" If in GitHub Actions, will run in GitHub context automatically\n"); + + const results = await client.experiment.run(researchTask, { + datasetSlug: "research-questions", + datasetVersion: "v1", + evaluators: ["Answer Quality", "Relevance"], + experimentSlug: "github-research-experiment", + aux: { + purpose: "Test GitHub integration", + framework: "openllmetry-js", + node_version: process.version, + platform: process.platform, + }, + }); + + console.log("\nโœ… Experiment completed successfully!"); + console.log("Results:", JSON.stringify(results, null, 2)); + + // Check if this is a GitHub response (has experimentSlug but not taskResults) + if ("taskResults" in results) { + // Local execution result + console.log(`\n - Task Results: ${results.taskResults.length}`); + console.log(` - Errors: ${results.errors.length}`); + if (results.experimentId) { + console.log(` - Experiment ID: ${results.experimentId}`); + } + } else { + // GitHub execution result + console.log("\n๐Ÿ’ก Results will be posted as a comment on the pull request by the backend"); + } + } catch (error) { + console.error( + "\nโŒ Error in GitHub experiment:", + error instanceof Error ? error.message : String(error), + ); + if (error instanceof Error && error.stack) { + console.error("Stack trace:", error.stack); + } + process.exit(1); + } +}; + +// Error handling for the main function +main().catch((error) => { + console.error("๐Ÿ’ฅ Application failed:", error.message); + process.exit(1); +}); diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index f9abe46a..58226292 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -12,6 +12,10 @@ import type { ExecutionResponse, CreateTaskRequest, CreateTaskResponse, + GithubContext, + TaskResult, + RunInGithubOptions, + RunInGithubResponse, } from "../../interfaces/experiment.interface"; export class Experiment { @@ -66,10 +70,37 @@ export class Experiment { /** * Run an experiment with the given task function and options + * If running in GitHub Actions, will automatically run in GitHub context. + * Otherwise, will run the experiment locally. */ async run( task: ExperimentTaskFunction, options: ExperimentRunOptions = {}, + ): Promise { + // Check if running in GitHub Actions + if (process.env.GITHUB_ACTIONS === "true") { + return await this.runInGithub(task, { + datasetSlug: options.datasetSlug || "", + datasetVersion: options.datasetVersion, + evaluators: options.evaluators, + experimentSlug: options.experimentSlug, + experimentMetadata: options.relatedRef ? { ...options.aux, created_from: "github" } : { created_from: "github" }, + experimentRunMetadata: { + ...(options.relatedRef && { related_ref: options.relatedRef }), + ...(options.aux && { aux: options.aux }), + }, + }); + } + + return await this.runLocally(task, options); + } + + /** + * Run an experiment locally (not in GitHub Actions) + */ + private async runLocally( + task: ExperimentTaskFunction, + options: ExperimentRunOptions = {}, ): Promise { const { datasetSlug, @@ -323,4 +354,173 @@ export class Experiment { }); } } + + /** + * Extract GitHub Actions context from environment variables + */ + private getGithubContext(): GithubContext { + const isGithubActions = process.env.CI === "true" && process.env.GITHUB_ACTIONS === "true"; + + if (!isGithubActions) { + throw new Error( + "This method can only be run in GitHub Actions. Please ensure CI=true and GITHUB_ACTIONS=true environment variables are set.", + ); + } + + const repository = process.env.GITHUB_REPOSITORY; + const ref = process.env.GITHUB_REF; + const sha = process.env.GITHUB_SHA; + const actor = process.env.GITHUB_ACTOR; + + if (!repository || !ref || !sha || !actor) { + throw new Error( + "Missing required GitHub environment variables: GITHUB_REPOSITORY, GITHUB_REF, GITHUB_SHA, or GITHUB_ACTOR", + ); + } + + // Extract PR number from ref (e.g., refs/pull/123/merge -> 123) + const prMatch = ref.match(/refs\/pull\/(\d+)\//); + const prNumber = prMatch ? prMatch[1] : null; + + if (!prNumber) { + throw new Error( + `This method can only be run on pull request events. Current ref: ${ref}`, + ); + } + + const prUrl = `https://github.com/${repository}/pull/${prNumber}`; + + return { + repository, + prUrl, + commitHash: sha, + actor, + }; + } + + /** + * Execute tasks locally and capture results + */ + private async executeTasksLocally( + task: ExperimentTaskFunction, + rows: Record[], + ): Promise { + const results: TaskResult[] = []; + + for (const row of rows) { + try { + const output = await task(row as TInput); + results.push({ + input: row, + output: output as Record, + metadata: { + rowId: row.id, + timestamp: Date.now(), + }, + }); + } catch (error) { + results.push({ + input: row, + error: error instanceof Error ? error.message : String(error), + metadata: { + rowId: row.id, + timestamp: Date.now(), + }, + }); + } + } + + return results; + } + + /** + * Run an experiment in GitHub Actions environment + * This method executes tasks locally and submits results to the backend for evaluation + */ + async runInGithub( + task: ExperimentTaskFunction, + options: RunInGithubOptions, + ): Promise { + const { + datasetSlug, + datasetVersion, + evaluators = [], + experimentMetadata, + experimentRunMetadata, + } = options; + + // Generate or use provided experiment slug + let { experimentSlug } = options; + if (!experimentSlug) { + experimentSlug = + this.client.experimentSlug || this.generateExperimentSlug(); + } + + // Validate task function + if (!task || typeof task !== "function") { + throw new Error("Task function is required and must be a function"); + } + + try { + // Get GitHub context + const githubContext = this.getGithubContext(); + + // Get dataset rows + const rows = await this.getDatasetRows(datasetSlug, datasetVersion); + + // Execute tasks locally + const taskResults = await this.executeTasksLocally(task, rows); + + // Prepare evaluator slugs + const evaluatorSlugs = evaluators.map((evaluator) => + typeof evaluator === "string" ? evaluator : evaluator.name, + ); + + // Add created_from to experiment metadata + const mergedExperimentMetadata = { + ...experimentMetadata, + created_from: "github", + }; + + // Submit to backend + const payload = { + experiment_slug: experimentSlug, + dataset_slug: datasetSlug, + dataset_version: datasetVersion, + evaluator_slugs: evaluatorSlugs, + task_results: taskResults, + github_context: { + repository: githubContext.repository, + pr_url: githubContext.prUrl, + commit_hash: githubContext.commitHash, + actor: githubContext.actor, + }, + experiment_metadata: mergedExperimentMetadata, + experiment_run_metadata: experimentRunMetadata, + }; + + const response = await this.client.post( + "/experiments/run-in-github", + payload, + ); + + if (!response.ok) { + throw new Error( + `Failed to submit GitHub experiment: ${response.status} ${response.statusText}`, + ); + } + + const data = await this.handleResponse(response); + + return { + experimentId: data.experimentId || data.experiment_id, + experimentSlug: data.experimentSlug || data.experiment_slug || experimentSlug, + runId: data.runId || data.run_id, + }; + } catch (error) { + throw new Error( + `GitHub experiment execution failed: ${error instanceof Error ? error.message : "Unknown error"}`, + ); + } + } } diff --git a/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts b/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts index 81eacefe..768a9fdb 100644 --- a/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts +++ b/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts @@ -94,3 +94,32 @@ export interface CreateTaskRequest { export interface CreateTaskResponse { id: string; } + +export interface GithubContext { + repository: string; + prUrl: string; + commitHash: string; + actor: string; +} + +export interface TaskResult { + input: Record; + output?: Record; + error?: string; + metadata?: Record; +} + +export interface RunInGithubOptions { + datasetSlug: string; + datasetVersion?: string; + evaluators?: EvaluatorDetails[]; + experimentSlug?: string; + experimentMetadata?: Record; + experimentRunMetadata?: Record; +} + +export interface RunInGithubResponse { + experimentId: string; + experimentSlug: string; + runId: string; +} diff --git a/packages/traceloop-sdk/src/lib/node-server-sdk.ts b/packages/traceloop-sdk/src/lib/node-server-sdk.ts index 1e986cde..cde04f0a 100644 --- a/packages/traceloop-sdk/src/lib/node-server-sdk.ts +++ b/packages/traceloop-sdk/src/lib/node-server-sdk.ts @@ -25,6 +25,10 @@ export { TaskResponse, ExecutionResponse, EvaluatorDetails, + GithubContext, + TaskResult, + RunInGithubOptions, + RunInGithubResponse, StreamEvent, SSEStreamEvent, } from "./interfaces"; From 6f9a2e432d1f493292e676bf12d9d04c44df2fff Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 12:15:16 +0200 Subject: [PATCH 2/9] real setting --- packages/sample-app/package.json | 1 + .../sample-app/src/sample_github_experiment.ts | 15 +++++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/sample-app/package.json b/packages/sample-app/package.json index 7abff2fe..0824f125 100644 --- a/packages/sample-app/package.json +++ b/packages/sample-app/package.json @@ -38,6 +38,7 @@ "run:sample_edit": "npm run build && node dist/src/test_edit_only.js", "run:sample_generate": "npm run build && node dist/src/test_generate_only.js", "run:sample_experiment": "npm run build && node dist/src/sample_experiment.js", + "run:github_experiment": "npx tsx src/sample_github_experiment.ts", "run:mcp": "npm run build && node dist/src/sample_mcp.js", "run:mcp:real": "npm run build && node dist/src/sample_mcp_real.js", "run:mcp:working": "npm run build && node dist/src/sample_mcp_working.js", diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index 2ffc3279..8c7f4cd2 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -80,17 +80,12 @@ const main = async () => { console.log("\n๐Ÿงช Running experiment..."); console.log(" If in GitHub Actions, will run in GitHub context automatically\n"); - const results = await client.experiment.run(researchTask, { - datasetSlug: "research-questions", - datasetVersion: "v1", - evaluators: ["Answer Quality", "Relevance"], + const results = await client.experiment.run( + researchTask, { + datasetSlug: "research-queries", + datasetVersion: "v2", + evaluators: ["research-relevancy", "categories", "research-facts-counter"], experimentSlug: "github-research-experiment", - aux: { - purpose: "Test GitHub integration", - framework: "openllmetry-js", - node_version: process.version, - platform: process.platform, - }, }); console.log("\nโœ… Experiment completed successfully!"); From 8b5c4bea44702ffe09f3ee5d6777e7a5960a1479 Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 12:28:12 +0200 Subject: [PATCH 3/9] setting --- .../sample-app/src/sample_github_experiment.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index 8c7f4cd2..a3edc781 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -49,9 +49,8 @@ const main = async () => { const researchTask: ExperimentTaskFunction = async ( row: TaskInput, ): Promise => { - const question = row.question as string; - - console.log(`Processing question: ${question}`); + console.log(`Processing question: ${row.query}`); + const question = row.query as string; const response = await openai.chat.completions.create({ model: "gpt-4", @@ -70,9 +69,8 @@ const main = async () => { return { question, - answer, - model: "gpt-4", - timestamp: Date.now(), + sentence: answer, + completion: answer, }; }; @@ -80,12 +78,11 @@ const main = async () => { console.log("\n๐Ÿงช Running experiment..."); console.log(" If in GitHub Actions, will run in GitHub context automatically\n"); - const results = await client.experiment.run( - researchTask, { + const results = await client.experiment.run(researchTask, { datasetSlug: "research-queries", datasetVersion: "v2", evaluators: ["research-relevancy", "categories", "research-facts-counter"], - experimentSlug: "github-research-experiment", + experimentSlug: "research-ts", }); console.log("\nโœ… Experiment completed successfully!"); From 11092cd2cc50c2cd442944dc97f5e16049d00cbc Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:12:34 +0200 Subject: [PATCH 4/9] works --- packages/sample-app/package.json | 2 +- packages/sample-app/src/sample_experiment.ts | 24 +++++++++++-------- .../src/sample_github_experiment.ts | 2 +- .../src/lib/client/experiment/experiment.ts | 10 +------- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/packages/sample-app/package.json b/packages/sample-app/package.json index 0824f125..37ab8dd9 100644 --- a/packages/sample-app/package.json +++ b/packages/sample-app/package.json @@ -38,7 +38,7 @@ "run:sample_edit": "npm run build && node dist/src/test_edit_only.js", "run:sample_generate": "npm run build && node dist/src/test_generate_only.js", "run:sample_experiment": "npm run build && node dist/src/sample_experiment.js", - "run:github_experiment": "npx tsx src/sample_github_experiment.ts", + "run:github_experiment": "npm run build && node dist/src/sample_github_experiment.js", "run:mcp": "npm run build && node dist/src/sample_mcp.js", "run:mcp:real": "npm run build && node dist/src/sample_mcp_real.js", "run:mcp:working": "npm run build && node dist/src/sample_mcp_working.js", diff --git a/packages/sample-app/src/sample_experiment.ts b/packages/sample-app/src/sample_experiment.ts index ce33c375..34da075d 100644 --- a/packages/sample-app/src/sample_experiment.ts +++ b/packages/sample-app/src/sample_experiment.ts @@ -129,11 +129,13 @@ const main = async () => { stopLoader(loader1, " โœ… Experiment completed"); - console.log(`โœ… Completed refuse advice experiment:`); - console.log(` - Results: ${results1.taskResults.length}`); - console.log(` - Errors: ${results1.errors.length}`); - console.log(` - Experiment ID: ${results1.experimentId}`); - console.log("Evaluation Results:", results1.evaluations); + if ("taskResults" in results1) { + console.log(`โœ… Completed refuse advice experiment:`); + console.log(` - Results: ${results1.taskResults.length}`); + console.log(` - Errors: ${results1.errors.length}`); + console.log(` - Experiment ID: ${results1.experimentId}`); + console.log("Evaluation Results:", results1.evaluations); + } console.log( "\n๐Ÿงช Running experiment with comprehensive medical info prompt...", @@ -151,11 +153,13 @@ const main = async () => { }); stopLoader(loader2, " โœ… Experiment completed"); - - console.log(`โœ… Completed provide info experiment:`); - console.log(` - Results: ${results2.taskResults.length}`); - console.log(` - Errors: ${results2.errors.length}`); - console.log(` - Experiment ID: ${results2.experimentId}`); + if ("taskResults" in results2) { + console.log(`โœ… Completed provide info experiment:`); + console.log(` - Results: ${results2.taskResults.length}`); + console.log(` - Errors: ${results2.errors.length}`); + console.log(` - Experiment ID: ${results2.experimentId}`); + console.log("Evaluation Results:", results2.evaluations); + } } catch (error) { console.error( "โŒ Error in experiment operations:", diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index a3edc781..d10e784b 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -53,7 +53,7 @@ const main = async () => { const question = row.query as string; const response = await openai.chat.completions.create({ - model: "gpt-4", + model: "gpt-4o", messages: [ { role: "system", diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index 58226292..b77af088 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -359,14 +359,6 @@ export class Experiment { * Extract GitHub Actions context from environment variables */ private getGithubContext(): GithubContext { - const isGithubActions = process.env.CI === "true" && process.env.GITHUB_ACTIONS === "true"; - - if (!isGithubActions) { - throw new Error( - "This method can only be run in GitHub Actions. Please ensure CI=true and GITHUB_ACTIONS=true environment variables are set.", - ); - } - const repository = process.env.GITHUB_REPOSITORY; const ref = process.env.GITHUB_REF; const sha = process.env.GITHUB_SHA; @@ -500,7 +492,7 @@ export class Experiment { }; const response = await this.client.post( - "/experiments/run-in-github", + "/v2/experiments/run-in-github", payload, ); From 0619959019b1010716ac63922a9d4ecac3c09dae Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:16:14 +0200 Subject: [PATCH 5/9] pretty --- .../sample-app/src/sample_github_experiment.ts | 17 +++++++++++++---- .../src/lib/client/experiment/experiment.ts | 9 +++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index d10e784b..72e3a18a 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -57,7 +57,8 @@ const main = async () => { messages: [ { role: "system", - content: "You are a helpful research assistant. Provide concise, accurate answers.", + content: + "You are a helpful research assistant. Provide concise, accurate answers.", }, { role: "user", content: question }, ], @@ -76,12 +77,18 @@ const main = async () => { try { console.log("\n๐Ÿงช Running experiment..."); - console.log(" If in GitHub Actions, will run in GitHub context automatically\n"); + console.log( + " If in GitHub Actions, will run in GitHub context automatically\n", + ); const results = await client.experiment.run(researchTask, { datasetSlug: "research-queries", datasetVersion: "v2", - evaluators: ["research-relevancy", "categories", "research-facts-counter"], + evaluators: [ + "research-relevancy", + "categories", + "research-facts-counter", + ], experimentSlug: "research-ts", }); @@ -98,7 +105,9 @@ const main = async () => { } } else { // GitHub execution result - console.log("\n๐Ÿ’ก Results will be posted as a comment on the pull request by the backend"); + console.log( + "\n๐Ÿ’ก Results will be posted as a comment on the pull request by the backend", + ); } } catch (error) { console.error( diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index b77af088..1b7b8f2a 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -70,8 +70,6 @@ export class Experiment { /** * Run an experiment with the given task function and options - * If running in GitHub Actions, will automatically run in GitHub context. - * Otherwise, will run the experiment locally. */ async run( task: ExperimentTaskFunction, @@ -84,7 +82,9 @@ export class Experiment { datasetVersion: options.datasetVersion, evaluators: options.evaluators, experimentSlug: options.experimentSlug, - experimentMetadata: options.relatedRef ? { ...options.aux, created_from: "github" } : { created_from: "github" }, + experimentMetadata: options.relatedRef + ? { ...options.aux, created_from: "github" } + : { created_from: "github" }, experimentRunMetadata: { ...(options.relatedRef && { related_ref: options.relatedRef }), ...(options.aux && { aux: options.aux }), @@ -506,7 +506,8 @@ export class Experiment { return { experimentId: data.experimentId || data.experiment_id, - experimentSlug: data.experimentSlug || data.experiment_slug || experimentSlug, + experimentSlug: + data.experimentSlug || data.experiment_slug || experimentSlug, runId: data.runId || data.run_id, }; } catch (error) { From 6a1258603b656bd49b52ed41cfe3219fa82618c9 Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:39:47 +0200 Subject: [PATCH 6/9] pr comments --- .../src/sample_github_experiment.ts | 2 +- .../src/lib/client/experiment/experiment.ts | 40 ++++++------------- .../lib/interfaces/experiment.interface.ts | 2 + 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index 72e3a18a..62a70f03 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -50,7 +50,7 @@ const main = async () => { row: TaskInput, ): Promise => { console.log(`Processing question: ${row.query}`); - const question = row.query as string; + const question = String(row.query ?? "") const response = await openai.chat.completions.create({ model: "gpt-4o", diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index 1b7b8f2a..d1342cdf 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -73,23 +73,11 @@ export class Experiment { */ async run( task: ExperimentTaskFunction, - options: ExperimentRunOptions = {}, + options: ExperimentRunOptions | RunInGithubOptions = {}, ): Promise { // Check if running in GitHub Actions if (process.env.GITHUB_ACTIONS === "true") { - return await this.runInGithub(task, { - datasetSlug: options.datasetSlug || "", - datasetVersion: options.datasetVersion, - evaluators: options.evaluators, - experimentSlug: options.experimentSlug, - experimentMetadata: options.relatedRef - ? { ...options.aux, created_from: "github" } - : { created_from: "github" }, - experimentRunMetadata: { - ...(options.relatedRef && { related_ref: options.relatedRef }), - ...(options.aux && { aux: options.aux }), - }, - }); + return await this.runInGithub(task, options as RunInGithubOptions); } return await this.runLocally(task, options); @@ -439,6 +427,8 @@ export class Experiment { evaluators = [], experimentMetadata, experimentRunMetadata, + relatedRef, + aux, } = options; // Generate or use provided experiment slug @@ -448,19 +438,15 @@ export class Experiment { this.client.experimentSlug || this.generateExperimentSlug(); } - // Validate task function if (!task || typeof task !== "function") { throw new Error("Task function is required and must be a function"); } try { - // Get GitHub context const githubContext = this.getGithubContext(); - // Get dataset rows const rows = await this.getDatasetRows(datasetSlug, datasetVersion); - // Execute tasks locally const taskResults = await this.executeTasksLocally(task, rows); // Prepare evaluator slugs @@ -468,12 +454,17 @@ export class Experiment { typeof evaluator === "string" ? evaluator : evaluator.name, ); - // Add created_from to experiment metadata const mergedExperimentMetadata = { - ...experimentMetadata, + ...(experimentMetadata || {}), created_from: "github", }; + const mergedExperimentRunMetadata = { + ...(experimentRunMetadata || {}), + ...(relatedRef && { related_ref: relatedRef }), + ...(aux && { aux: aux }), + }; + // Submit to backend const payload = { experiment_slug: experimentSlug, @@ -488,20 +479,13 @@ export class Experiment { actor: githubContext.actor, }, experiment_metadata: mergedExperimentMetadata, - experiment_run_metadata: experimentRunMetadata, + experiment_run_metadata: mergedExperimentRunMetadata, }; const response = await this.client.post( "/v2/experiments/run-in-github", payload, ); - - if (!response.ok) { - throw new Error( - `Failed to submit GitHub experiment: ${response.status} ${response.statusText}`, - ); - } - const data = await this.handleResponse(response); return { diff --git a/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts b/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts index 768a9fdb..cd1a65e2 100644 --- a/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts +++ b/packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts @@ -116,6 +116,8 @@ export interface RunInGithubOptions { experimentSlug?: string; experimentMetadata?: Record; experimentRunMetadata?: Record; + relatedRef?: Record; + aux?: Record; } export interface RunInGithubResponse { From d3573c7b12c65becbd9416e4f79c8a2ad74b2ffb Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 14:41:28 +0200 Subject: [PATCH 7/9] pretty --- packages/sample-app/src/sample_github_experiment.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sample-app/src/sample_github_experiment.ts b/packages/sample-app/src/sample_github_experiment.ts index 62a70f03..b5525a2c 100644 --- a/packages/sample-app/src/sample_github_experiment.ts +++ b/packages/sample-app/src/sample_github_experiment.ts @@ -50,7 +50,7 @@ const main = async () => { row: TaskInput, ): Promise => { console.log(`Processing question: ${row.query}`); - const question = String(row.query ?? "") + const question = String(row.query ?? ""); const response = await openai.chat.completions.create({ model: "gpt-4o", From 9db388d227e8768aa201cc405102f36443ebc4c0 Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:51:41 +0200 Subject: [PATCH 8/9] pr comment --- .../src/lib/client/experiment/experiment.ts | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index d1342cdf..ff586722 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -385,32 +385,30 @@ export class Experiment { task: ExperimentTaskFunction, rows: Record[], ): Promise { - const results: TaskResult[] = []; - - for (const row of rows) { - try { - const output = await task(row as TInput); - results.push({ - input: row, - output: output as Record, - metadata: { - rowId: row.id, - timestamp: Date.now(), - }, - }); - } catch (error) { - results.push({ - input: row, - error: error instanceof Error ? error.message : String(error), - metadata: { - rowId: row.id, - timestamp: Date.now(), - }, - }); - } - } - - return results; + return await Promise.all( + rows.map(async (row) => { + try { + const output = await task(row as TInput); + return { + input: row, + output: output as Record, + metadata: { + rowId: row.id, + timestamp: Date.now(), + }, + }; + } catch (error) { + return { + input: row, + error: error instanceof Error ? error.message : String(error), + metadata: { + rowId: row.id, + timestamp: Date.now(), + }, + }; + } + }) + ); } /** @@ -488,12 +486,7 @@ export class Experiment { ); const data = await this.handleResponse(response); - return { - experimentId: data.experimentId || data.experiment_id, - experimentSlug: - data.experimentSlug || data.experiment_slug || experimentSlug, - runId: data.runId || data.run_id, - }; + return data; } catch (error) { throw new Error( `GitHub experiment execution failed: ${error instanceof Error ? error.message : "Unknown error"}`, From b4a55728b7e23b238d87d9d3206a5ebe09ba3567 Mon Sep 17 00:00:00 2001 From: nina-kollman <59646487+nina-kollman@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:55:32 +0200 Subject: [PATCH 9/9] pretty --- packages/traceloop-sdk/src/lib/client/experiment/experiment.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts index ff586722..85f2cc2d 100644 --- a/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts +++ b/packages/traceloop-sdk/src/lib/client/experiment/experiment.ts @@ -407,7 +407,7 @@ export class Experiment { }, }; } - }) + }), ); }