Skip to content

Commit

Permalink
Modularize detection checks
Browse files Browse the repository at this point in the history
  • Loading branch information
Risto McGehee committed Dec 23, 2023
1 parent 1e7c393 commit dadc392
Show file tree
Hide file tree
Showing 11 changed files with 550 additions and 456 deletions.
13 changes: 12 additions & 1 deletion javascript-sdk/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
export { default as RebuffApi } from "./api";
export { default as RebuffSdk } from "./sdk";
export {
export type {
ApiConfig,
SdkConfig,
RebuffConfig,
VectorDbConfig,
} from "./config";
export {
RebuffError
} from "./interface";
export type {
DetectRequest,
DetectResponse,
Rebuff,
TacticName,
TacticOverride,
TacticResult
} from "./interface";
59 changes: 44 additions & 15 deletions javascript-sdk/src/interface.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,54 @@
export enum TacticName {
// A series of heuristics are used to determine whether the input is prompt injection.
Heuristic = "heuristic",
// A language model is asked if the input appears to be prompt injection.
LanguageModel = "language_model",
// A vector database of known prompt injection attacks is queried for similarity.
VectorDB = "vector_db",
}

export interface TacticOverride {
// The name of the tactic to override.
name: TacticName;
// The threshold to use for this tactic. If the score is above this threshold, the tactic will be considered detected.
// If not specified, the default threshold for the tactic will be used.
threshold?: number;
// Whether to run this tactic. Defaults to true if not specified.
run?: boolean;
}

export interface DetectRequest {
// The user input to check for prompt injection.
userInput: string;
// The base64-encoded user input. If this is specified, the user input will be ignored.
userInputBase64?: string;
runHeuristicCheck: boolean;
runVectorCheck: boolean;
runLanguageModelCheck: boolean;
maxHeuristicScore: number;
maxModelScore: number;
maxVectorScore: number;
// Any tactics to change behavior for. If any tactic is not specified, the default threshold for that tactic will be
// used.
tacticOverrides?: TacticOverride[];
}

export interface TacticResult {
// The name of the tactic.
name: TacticName;
// The score for the tactic. This is a number between 0 and 1. The closer to 1, the more likely that this is a
// prompt injection attempt.
score: number;
// Whether this tactic evaluated the input as a prompt injection attempt.
detected: boolean;
// The threshold used for this tactic. If the score is above this threshold, the tactic will be considered detected.
threshold: number;
// Some tactics return additional fields:
// * "vector_db":
// - "countOverMaxVectorScore" (number): The number of different vectors whose similarity score is above the
// threshold.
additionalFields: Record<string, any>;
}

export interface DetectResponse {
heuristicScore: number;
modelScore: number;
vectorScore: Record<string, number>;
runHeuristicCheck: boolean;
runVectorCheck: boolean;
runLanguageModelCheck: boolean;
maxHeuristicScore: number;
maxVectorScore: number;
maxModelScore: number;
// Whether prompt injection was detected.
injectionDetected: boolean;
// The result for each tactic that was executed.
tacticResults: TacticResult[];
}

export class RebuffError extends Error {
Expand Down
197 changes: 0 additions & 197 deletions javascript-sdk/src/lib/detect.ts

This file was deleted.

Loading

0 comments on commit dadc392

Please sign in to comment.