diff --git a/docs/api/retrieval_model_clients/FaissRM.md b/docs/api/retrieval_model_clients/FaissRM.md
index 2ef9dadc1f..6349045216 100644
--- a/docs/api/retrieval_model_clients/FaissRM.md
+++ b/docs/api/retrieval_model_clients/FaissRM.md
@@ -40,7 +40,7 @@ The **FaissRM** module provides a retriever that uses an in-memory Faiss vector
 
 ```python
 import dspy
-from dspy.retrieve import FaissRM
+from dspy.retrieve.faiss_rm import FaissRM
 
 document_chunks = [
     "The superbowl this year was played between the San Francisco 49ers and the Kanasas City Chiefs",
@@ -59,4 +59,4 @@ frm = FaissRM(document_chunks)
 turbo = dspy.OpenAI(model="gpt-3.5-turbo")
 dspy.settings.configure(lm=turbo, rm=frm)
 print(frm(["I am in the mood for Chinese food"]))
-```
\ No newline at end of file
+```
diff --git a/dsp/modules/aws_providers.py b/dsp/modules/aws_providers.py
index 646cc0f431..5a6138ead6 100644
--- a/dsp/modules/aws_providers.py
+++ b/dsp/modules/aws_providers.py
@@ -3,6 +3,31 @@
 from abc import ABC, abstractmethod
 from typing import Any, Optional
 
+import backoff
+
+try:
+    import boto3
+    from botocore.exceptions import ClientError
+    ERRORS = (ClientError,)
+
+except ImportError:
+    ERRORS = (Exception,)
+
+
+def backoff_hdlr(details):
+    """Handler from https://pypi.org/project/backoff/."""
+    print(
+        "Backing off {wait:0.1f} seconds after {tries} tries "
+        "calling function {target} with kwargs "
+        "{kwargs}".format(**details),
+    )
+
+
+def giveup_hdlr(details):
+    """Wrapper function that decides when to give up on retry."""
+    if "max retries" in details.args[0]:
+        return False
+    return True
 
 class AWSProvider(ABC):
     """This abstract class adds support for AWS model providers such as Bedrock and SageMaker.
@@ -52,6 +77,14 @@ def get_provider_name(self) -> str:
         return self.__class__.__name__
 
     @abstractmethod
+    @backoff.on_exception(
+        backoff.expo,
+        ERRORS,
+        max_time=1000,
+        max_tries=8,
+        on_backoff=backoff_hdlr,
+        giveup=giveup_hdlr,
+    )
     def call_model(self, model_id: str, body: str) -> str:
         """Call the model and return the response."""
 
@@ -119,6 +152,14 @@ def __init__(
         """
         super().__init__(region_name, "runtime.sagemaker", profile_name)
 
+    @backoff.on_exception(
+        backoff.expo,
+        ERRORS,
+        max_time=1000,
+        max_tries=8,
+        on_backoff=backoff_hdlr,
+        giveup=giveup_hdlr,
+    )
     def call_model(self, model_id: str, body: str) -> str:
         return self.predictor.invoke_endpoint(
             EndpointName=model_id,