openai · jwang47 · Aug 3, 2023 · Jul 9, 2023 · Jul 9, 2023 · Jul 9, 2023
@@ -49,7 +49,40 @@ def get_parser() -> argparse.ArgumentParser:
         help="Path to the registry",
     )
     parser.add_argument("--debug", action=argparse.BooleanOptionalAction, default=False)
-    parser.add_argument("--local-run", action=argparse.BooleanOptionalAction, default=True)
+    parser.add_argument(
+        "--local-run",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Enable local mode for running evaluations. In this mode, the evaluation results are stored locally in a JSON file. This mode is enabled by default.",
+    )
+
+    parser.add_argument(
+        "--http-run",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Enable HTTP mode for running evaluations. In this mode, the evaluation results are sent to a specified URL rather than being stored locally or in Snowflake. This mode should be used in conjunction with the '--http-run-url' and '--http-batch-size' arguments.",
+    )
+
+    parser.add_argument(
+        "--http-run-url",
+        type=str,
+        default=None,
+        help="URL to send the evaluation results when in HTTP mode. This option should be used in conjunction with the '--http-run' flag.",
+    )
+
+    parser.add_argument(
+        "--http-batch-size",
+        type=int,
+        default=100,
+        help="Number of events to send in each HTTP request when in HTTP mode. Default is 1, i.e., send events individually. Set to a larger number to send events in batches. This option should be used in conjunction with the '--http-run' flag.",
+    )
+    parser.add_argument(
+        "--http-fail-percent-threshold",
+        type=int,
+        default=5,
+        help="The acceptable percentage threshold of HTTP requests that can fail. Default is 5, meaning 5% of total HTTP requests can fail without causing any issues. If the failure rate goes beyond this threshold, suitable action should be taken or the process will be deemed as failing, but still stored locally.",
+    )
+
     parser.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)
     parser.add_argument("--dry-run-logging", action=argparse.BooleanOptionalAction, default=True)
     return parser
@@ -69,6 +102,10 @@ class OaiEvalArguments(argparse.Namespace):
     registry_path: Optional[str]
     debug: bool
     local_run: bool
+    http_run: bool
+    http_run_url: Optional[str]
+    http_batch_size: int
+    http_fail_percent_threshold: int
     dry_run: bool
     dry_run_logging: bool
 
@@ -122,13 +159,38 @@ def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> str:
     else:
         record_path = args.record_path
 
+    if args.http_run:
+        args.local_run = False
+    elif args.local_run:
+        args.http_run = False
+
     recorder: evals.record.RecorderBase
+    recorder_kwargs = []
     if args.dry_run:
-        recorder = evals.record.DummyRecorder(run_spec=run_spec, log=args.dry_run_logging)
+        recorder_class = evals.record.DummyRecorder
+        recorder_args = {"run_spec": run_spec, "log": args.dry_run_logging}
     elif args.local_run:
-        recorder = evals.record.LocalRecorder(record_path, run_spec=run_spec)
+        recorder_class = evals.record.LocalRecorder
+        recorder_args = {"run_spec": run_spec}
+        recorder_kwargs = [record_path]
+    elif args.http_run:
+        if args.http_run_url is None:
+            raise ValueError("URL must be specified when using http-run mode")
+        recorder_class = evals.record.HttpRecorder
+        recorder_args = {
+            "url": args.http_run_url,
+            "run_spec": run_spec,
+            "batch_size": args.http_batch_size,
+            "fail_percent_threshold": args.http_fail_percent_threshold,
+            "local_fallback_path": record_path,
+        }
+
     else:
-        recorder = evals.record.Recorder(record_path, run_spec=run_spec)
+        recorder_class = evals.record.Recorder
+        recorder_args = {"run_spec": run_spec}
+        recorder_kwargs = [record_path]
+
+    recorder = recorder_class(*recorder_kwargs, **recorder_args)
 
     api_extra_options: dict[str, Any] = {}
     if not args.cache:
@@ -190,7 +252,6 @@ def main() -> None:
     )
     logging.getLogger("openai").setLevel(logging.WARN)
 
-    # TODO)) why do we need this?
     if hasattr(openai.error, "set_display_cause"):  # type: ignore
         openai.error.set_display_cause()  # type: ignore
     run(args)

@@ -17,6 +17,7 @@
 from typing import Any, List, Optional, Sequence
 
 import blobfile as bf
+import requests
 
 import evals
 from evals.base import RunSpec
@@ -341,6 +342,100 @@ def record_final_report(self, final_report: Any):
         logging.info(f"Final report: {final_report}. Logged to {self.event_file_path}")
 
 
+class HttpRecorder(RecorderBase):
+    def __init__(
+        self,
+        url: str,
+        run_spec: RunSpec,
+        local_fallback_path: str,
+        fail_percent_threshold: int = 5,
+        batch_size: int = 100,
+    ):
+        super().__init__(run_spec)
+        self.url = url
+        self.batch_size = batch_size
+        self.fail_percent_threshold = fail_percent_threshold / 100
+        self.failed_requests = 0  # Add this line to track failed requests
+        self.local_fallback_path = local_fallback_path
+        self.local_fallback_recorder = LocalRecorder(local_fallback_path, run_spec)
+        logger.info(f"HttpRecorder initialized with URL {self.url}")
+
+    def _flush_events_internal(self, events_to_write: Sequence[Event]):
+        batch_size = self.batch_size
+        for i in range(0, len(events_to_write), batch_size):
+            batch = list(events_to_write[i : i + batch_size])
+            try:
+                self._send_event(batch)
+            except RuntimeError as e:
+                logger.error(f"Falling back to LocalRecorder due to error: {str(e)}")
+                self.local_fallback_recorder._flush_events_internal(batch)
+                raise RuntimeError(
+                    "An error occurred when sending events. Your events have been saved locally using the Local recorder."
+                )
+
+    def _send_event(self, events: List[Event]):
+        # Convert the events to dictionaries
+        events_dict = [dataclasses.asdict(event) for event in events]
+
+        logger.debug(f"Sending events: {events_dict}")
+
+        try:
+            # Send the events to the specified URL
+            response = requests.post(self.url, json=events_dict)
+
+            # If the request succeeded, log a success message
+            if response.ok:
+                logger.debug(f"Events sent successfully")
+
+            # If the request failed, log a warning and increment failed_requests
+            else:
+                logger.warning(f"Failed to send events: {response.text}")
+                self.failed_requests += len(
+                    events
+                )  # Increase the count by the number of events in the failed request
+
+        except Exception as e:
+            logger.warning(f"Failed to send events: {str(e)}")
+            self.failed_requests += len(
+                events
+            )  # Increase the count by the number of events in the failed request
+
+            # Check if the proportion of failed requests exceeds the threshold
+            fail_threshold = self.fail_percent_threshold
+            # Make a string for human comprehention
+            fail_threshold_str = str(fail_threshold * 100) + "%"
+
+            if self.failed_requests / len(self._events) > fail_threshold:
+                raise RuntimeError(
+                    "The proportion of failed events has exceeded the threshold of: "
+                    + fail_threshold_str
+                    + "."
+                    + " Falling back to LocalRecorder. "
+                    "You can modify this via the cli flag --http-fail-percent-threshold"
+                )
+
+    def record_final_report(self, final_report: Any):
+        # Convert the final report to a dictionary and prepare it as an event
+        report_event = Event(
+            run_id=self.run_spec.run_id,
+            event_id=len(self._events),
+            sample_id=None,  # or you could use a specific id for final reports
+            type="final_report",
+            data=final_report,
+            created_by=self.run_spec.created_by,
+            created_at=str(datetime.now(timezone.utc)),
+        )
+
+        # Send the final report event
+        try:
+            self._send_event([report_event])
+            logging.info(f"Final report: {final_report}.")
+            logging.info(f"Data logged to: {self.url}")
+        except RuntimeError as e:
+            logger.error(f"Falling back to LocalRecorder due to error: {str(e)}")
+            self.local_fallback_recorder.record_final_report(final_report)
+
+
 class Recorder(RecorderBase):
     """
     A recorder which logs events to Snowflake.