Modularize detection checks

protectai · Dec 23, 2023 · dadc392 · dadc392
1 parent 1e7c393
commit dadc392
Show file tree

Hide file tree

Showing 11 changed files with 550 additions and 456 deletions.
diff --git a/javascript-sdk/src/index.ts b/javascript-sdk/src/index.ts
@@ -1,8 +1,19 @@
 export { default as RebuffApi } from "./api";
 export { default as RebuffSdk } from "./sdk";
-export {
+export type {
   ApiConfig,
   SdkConfig,
   RebuffConfig,
   VectorDbConfig,
 } from "./config";
+export {
+  RebuffError
+} from "./interface";
+export type {
+  DetectRequest,
+  DetectResponse,
+  Rebuff,
+  TacticName,
+  TacticOverride,
+  TacticResult
+} from "./interface";
diff --git a/javascript-sdk/src/interface.ts b/javascript-sdk/src/interface.ts
@@ -1,25 +1,54 @@
+export enum TacticName {
+  // A series of heuristics are used to determine whether the input is prompt injection.
+  Heuristic = "heuristic",
+  // A language model is asked if the input appears to be prompt injection.
+  LanguageModel = "language_model",
+  // A vector database of known prompt injection attacks is queried for similarity.
+  VectorDB = "vector_db",
+}
+
+export interface TacticOverride {
+  // The name of the tactic to override.
+  name: TacticName;
+  // The threshold to use for this tactic. If the score is above this threshold, the tactic will be considered detected.
+  // If not specified, the default threshold for the tactic will be used.
+  threshold?: number;
+  // Whether to run this tactic. Defaults to true if not specified.
+  run?: boolean;
+}
+
 export interface DetectRequest {
+  // The user input to check for prompt injection.
   userInput: string;
+  // The base64-encoded user input. If this is specified, the user input will be ignored.
   userInputBase64?: string;
-  runHeuristicCheck: boolean;
-  runVectorCheck: boolean;
-  runLanguageModelCheck: boolean;
-  maxHeuristicScore: number;
-  maxModelScore: number;
-  maxVectorScore: number;
+  // Any tactics to change behavior for. If any tactic is not specified, the default threshold for that tactic will be
+  // used.
+  tacticOverrides?: TacticOverride[];
+}
+
+export interface TacticResult {
+  // The name of the tactic.
+  name: TacticName;
+  // The score for the tactic. This is a number between 0 and 1. The closer to 1, the more likely that this is a
+  // prompt injection attempt.
+  score: number;
+  // Whether this tactic evaluated the input as a prompt injection attempt.
+  detected: boolean;
+  // The threshold used for this tactic. If the score is above this threshold, the tactic will be considered detected.
+  threshold: number;
+  // Some tactics return additional fields:
+  // * "vector_db":
+  //   - "countOverMaxVectorScore" (number): The number of different vectors whose similarity score is above the 
+  //       threshold.
+  additionalFields: Record<string, any>;
 }
 
 export interface DetectResponse {
-  heuristicScore: number;
-  modelScore: number;
-  vectorScore: Record<string, number>;
-  runHeuristicCheck: boolean;
-  runVectorCheck: boolean;
-  runLanguageModelCheck: boolean;
-  maxHeuristicScore: number;
-  maxVectorScore: number;
-  maxModelScore: number;
+  // Whether prompt injection was detected.
   injectionDetected: boolean;
+  // The result for each tactic that was executed.
+  tacticResults: TacticResult[];
 }
 
 export class RebuffError extends Error {

diff --git a/javascript-sdk/src/lib/detect.ts b/javascript-sdk/src/lib/detect.ts