Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/sample-app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"run:sample_edit": "npm run build && node dist/src/test_edit_only.js",
"run:sample_generate": "npm run build && node dist/src/test_generate_only.js",
"run:sample_experiment": "npm run build && node dist/src/sample_experiment.js",
"run:github_experiment": "npm run build && node dist/src/sample_github_experiment.js",
"run:mcp": "npm run build && node dist/src/sample_mcp.js",
"run:mcp:real": "npm run build && node dist/src/sample_mcp_real.js",
"run:mcp:working": "npm run build && node dist/src/sample_mcp_working.js",
Expand Down
24 changes: 14 additions & 10 deletions packages/sample-app/src/sample_experiment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,13 @@ const main = async () => {

stopLoader(loader1, " ✅ Experiment completed");

console.log(`✅ Completed refuse advice experiment:`);
console.log(` - Results: ${results1.taskResults.length}`);
console.log(` - Errors: ${results1.errors.length}`);
console.log(` - Experiment ID: ${results1.experimentId}`);
console.log("Evaluation Results:", results1.evaluations);
if ("taskResults" in results1) {
console.log(`✅ Completed refuse advice experiment:`);
console.log(` - Results: ${results1.taskResults.length}`);
console.log(` - Errors: ${results1.errors.length}`);
console.log(` - Experiment ID: ${results1.experimentId}`);
console.log("Evaluation Results:", results1.evaluations);
}

console.log(
"\n🧪 Running experiment with comprehensive medical info prompt...",
Expand All @@ -151,11 +153,13 @@ const main = async () => {
});

stopLoader(loader2, " ✅ Experiment completed");

console.log(`✅ Completed provide info experiment:`);
console.log(` - Results: ${results2.taskResults.length}`);
console.log(` - Errors: ${results2.errors.length}`);
console.log(` - Experiment ID: ${results2.experimentId}`);
if ("taskResults" in results2) {
console.log(`✅ Completed provide info experiment:`);
console.log(` - Results: ${results2.taskResults.length}`);
console.log(` - Errors: ${results2.errors.length}`);
console.log(` - Experiment ID: ${results2.experimentId}`);
console.log("Evaluation Results:", results2.evaluations);
}
} catch (error) {
console.error(
"❌ Error in experiment operations:",
Expand Down
128 changes: 128 additions & 0 deletions packages/sample-app/src/sample_github_experiment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import * as traceloop from "@traceloop/node-server-sdk";
import { OpenAI } from "openai";
import type {
ExperimentTaskFunction,
TaskInput,
TaskOutput,
} from "@traceloop/node-server-sdk";

import "dotenv/config";

const main = async () => {
console.log("Starting GitHub experiment sample");
traceloop.initialize({
appName: "sample_github_experiment",
apiKey: process.env.TRACELOOP_API_KEY,
disableBatch: true,
traceloopSyncEnabled: true,
});

try {
await traceloop.waitForInitialization();
} catch (error) {
console.error(
"Failed to initialize Traceloop SDK:",
error instanceof Error ? error.message : String(error),
);
console.error("Initialization error details:", error);
process.exit(1);
}

const client = traceloop.getClient();
if (!client) {
console.error("Failed to initialize Traceloop client");
return;
}

console.log("🚀 GitHub Experiment Sample Application");
console.log("=======================================\n");

// Initialize OpenAI client
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});

/**
* Example task function that generates a response using OpenAI
* This will be executed locally in GitHub Actions
*/
const researchTask: ExperimentTaskFunction = async (
row: TaskInput,
): Promise<TaskOutput> => {
console.log(`Processing question: ${row.query}`);
const question = String(row.query ?? "");

const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [
{
role: "system",
content:
"You are a helpful research assistant. Provide concise, accurate answers.",
},
{ role: "user", content: question },
],
temperature: 0.7,
max_tokens: 500,
});

const answer = response.choices?.[0]?.message?.content || "";

return {
question,
sentence: answer,
completion: answer,
};
};

try {
console.log("\n🧪 Running experiment...");
console.log(
" If in GitHub Actions, will run in GitHub context automatically\n",
);

const results = await client.experiment.run(researchTask, {
datasetSlug: "research-queries",
datasetVersion: "v2",
evaluators: [
"research-relevancy",
"categories",
"research-facts-counter",
],
experimentSlug: "research-ts",
});

console.log("\n✅ Experiment completed successfully!");
console.log("Results:", JSON.stringify(results, null, 2));

// Check if this is a GitHub response (has experimentSlug but not taskResults)
if ("taskResults" in results) {
// Local execution result
console.log(`\n - Task Results: ${results.taskResults.length}`);
console.log(` - Errors: ${results.errors.length}`);
if (results.experimentId) {
console.log(` - Experiment ID: ${results.experimentId}`);
}
} else {
// GitHub execution result
console.log(
"\n💡 Results will be posted as a comment on the pull request by the backend",
);
}
} catch (error) {
console.error(
"\n❌ Error in GitHub experiment:",
error instanceof Error ? error.message : String(error),
);
if (error instanceof Error && error.stack) {
console.error("Stack trace:", error.stack);
}
process.exit(1);
}
};

// Error handling for the main function
main().catch((error) => {
console.error("💥 Application failed:", error.message);
process.exit(1);
});
170 changes: 170 additions & 0 deletions packages/traceloop-sdk/src/lib/client/experiment/experiment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ import type {
ExecutionResponse,
CreateTaskRequest,
CreateTaskResponse,
GithubContext,
TaskResult,
RunInGithubOptions,
RunInGithubResponse,
} from "../../interfaces/experiment.interface";

export class Experiment {
Expand Down Expand Up @@ -68,6 +72,21 @@ export class Experiment {
* Run an experiment with the given task function and options
*/
async run<TInput, TOutput>(
task: ExperimentTaskFunction<TInput, TOutput>,
options: ExperimentRunOptions | RunInGithubOptions = {},
): Promise<ExperimentRunResult | RunInGithubResponse> {
// Check if running in GitHub Actions
if (process.env.GITHUB_ACTIONS === "true") {
return await this.runInGithub(task, options as RunInGithubOptions);
}

return await this.runLocally(task, options);
}

/**
* Run an experiment locally (not in GitHub Actions)
*/
private async runLocally<TInput, TOutput>(
task: ExperimentTaskFunction<TInput, TOutput>,
options: ExperimentRunOptions = {},
): Promise<ExperimentRunResult> {
Expand Down Expand Up @@ -323,4 +342,155 @@ export class Experiment {
});
}
}

/**
* Extract GitHub Actions context from environment variables
*/
private getGithubContext(): GithubContext {
const repository = process.env.GITHUB_REPOSITORY;
const ref = process.env.GITHUB_REF;
const sha = process.env.GITHUB_SHA;
const actor = process.env.GITHUB_ACTOR;

if (!repository || !ref || !sha || !actor) {
throw new Error(
"Missing required GitHub environment variables: GITHUB_REPOSITORY, GITHUB_REF, GITHUB_SHA, or GITHUB_ACTOR",
);
}

// Extract PR number from ref (e.g., refs/pull/123/merge -> 123)
const prMatch = ref.match(/refs\/pull\/(\d+)\//);
const prNumber = prMatch ? prMatch[1] : null;

if (!prNumber) {
throw new Error(
`This method can only be run on pull request events. Current ref: ${ref}`,
);
}

const prUrl = `https://github.com/${repository}/pull/${prNumber}`;

return {
repository,
prUrl,
commitHash: sha,
actor,
};
}

/**
* Execute tasks locally and capture results
*/
private async executeTasksLocally<TInput, TOutput>(
task: ExperimentTaskFunction<TInput, TOutput>,
rows: Record<string, any>[],
): Promise<TaskResult[]> {
return await Promise.all(
rows.map(async (row) => {
try {
const output = await task(row as TInput);
return {
input: row,
output: output as Record<string, any>,
metadata: {
rowId: row.id,
timestamp: Date.now(),
},
};
} catch (error) {
return {
input: row,
error: error instanceof Error ? error.message : String(error),
metadata: {
rowId: row.id,
timestamp: Date.now(),
},
};
}
}),
);
}

/**
* Run an experiment in GitHub Actions environment
* This method executes tasks locally and submits results to the backend for evaluation
*/
async runInGithub<TInput, TOutput>(
task: ExperimentTaskFunction<TInput, TOutput>,
options: RunInGithubOptions,
): Promise<RunInGithubResponse> {
const {
datasetSlug,
datasetVersion,
evaluators = [],
experimentMetadata,
experimentRunMetadata,
relatedRef,
aux,
} = options;

// Generate or use provided experiment slug
let { experimentSlug } = options;
if (!experimentSlug) {
experimentSlug =
this.client.experimentSlug || this.generateExperimentSlug();
}

if (!task || typeof task !== "function") {
throw new Error("Task function is required and must be a function");
}

try {
const githubContext = this.getGithubContext();

const rows = await this.getDatasetRows(datasetSlug, datasetVersion);

const taskResults = await this.executeTasksLocally(task, rows);

// Prepare evaluator slugs
const evaluatorSlugs = evaluators.map((evaluator) =>
typeof evaluator === "string" ? evaluator : evaluator.name,
);

const mergedExperimentMetadata = {
...(experimentMetadata || {}),
created_from: "github",
};

const mergedExperimentRunMetadata = {
...(experimentRunMetadata || {}),
...(relatedRef && { related_ref: relatedRef }),
...(aux && { aux: aux }),
};

// Submit to backend
const payload = {
experiment_slug: experimentSlug,
dataset_slug: datasetSlug,
dataset_version: datasetVersion,
evaluator_slugs: evaluatorSlugs,
task_results: taskResults,
github_context: {
repository: githubContext.repository,
pr_url: githubContext.prUrl,
commit_hash: githubContext.commitHash,
actor: githubContext.actor,
},
experiment_metadata: mergedExperimentMetadata,
experiment_run_metadata: mergedExperimentRunMetadata,
};

const response = await this.client.post(
"/v2/experiments/run-in-github",
payload,
);
const data = await this.handleResponse(response);

return data;
} catch (error) {
throw new Error(
`GitHub experiment execution failed: ${error instanceof Error ? error.message : "Unknown error"}`,
);
}
}
}
31 changes: 31 additions & 0 deletions packages/traceloop-sdk/src/lib/interfaces/experiment.interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,34 @@ export interface CreateTaskRequest {
export interface CreateTaskResponse {
id: string;
}

export interface GithubContext {
repository: string;
prUrl: string;
commitHash: string;
actor: string;
}

export interface TaskResult {
input: Record<string, any>;
output?: Record<string, any>;
error?: string;
metadata?: Record<string, any>;
}

export interface RunInGithubOptions {
datasetSlug: string;
datasetVersion?: string;
evaluators?: EvaluatorDetails[];
experimentSlug?: string;
experimentMetadata?: Record<string, any>;
experimentRunMetadata?: Record<string, any>;
relatedRef?: Record<string, string>;
aux?: Record<string, string>;
}

export interface RunInGithubResponse {
experimentId: string;
experimentSlug: string;
runId: string;
}
Loading