# Azure OpenAI Helper
We here a lot about having rate limits on subscriptions and different models. While you can implement that in appliances in front of your resources in Azure you can also build it into a simple yet reliable class to "retry" on a "429" error. This notebook implements a `@retry` from the `tenacity` library to retry on a 429 error and automatically swtiches to your other deployments for you.

## Setup for one to many endpoints
1. At minimum create the environment variables
    - Set the `OPENAI_API_BASE` and the `OPENAI_API_KEY` environment variables at minimum
1. OR: Create a json.env file with the content like below
    ```json
    [
        {"endpoint":"https://openairesource1.openai.azure.com/","key":"999aaa9999"},
        {"endpoint":"https://openairesource2.openai.azure.com/","key":"999aaa9999"}
    ]
    ```

## Use the AOAIHelper class as defined in this document

In [3]:
import os
import json
import requests
from dotenv import load_dotenv
from tenacity import retry, wait_random_exponential, stop_after_attempt, retry_if_exception_type,stop_after_delay

# Get Configuration Settings from a .env file
load_dotenv()

True

In [4]:
class AOAIHelper:
    __endpoints=[]
    __pointers=[]
    __models=[]

    class OpenAI429Exception(Exception):
        """Raised when the status code is 429"""
        pass

    def __init__(self,endpoints=[]):
        if len(endpoints)==0:
            endpoints.append({"endpoint":os.getenv('OPENAI_API_BASE'),"key":os.getenv('OPENAI_API_KEY')})
        
        self.__endpoints=endpoints

        for endpoint in endpoints:
            headers={"Content-Type":"application/json","api-key":endpoint['key']}
            uri = f"{endpoint['endpoint']}openai/deployments?api-version=2022-12-01"

            response = requests.get(uri,headers=headers).json()
            for dep in response['data']:
                self.__models.append({"endpoint":endpoint['endpoint'],"key":endpoint['key'],"model":dep['model'],"deployment":dep['id']})

        # Get distinct modelNames in the endpoints list
        pts = list(set([x['model'] for x in self.__models]))
        for pt in pts:
            x=len([x for x in self.__models if x['model']==pt])
            self.__pointers.append({"model":pt,"pointer":0,"count":x})

    def __getModel(self,model="text-davinci-003"):
        pointer=next((m for m in self.__pointers if m['model'] == model), None) 
        mods=list(filter(lambda x: x['model'] == model, self.__models))
        return mods[pointer['pointer']]
    
    def __incrementModel(self,model="text-davinci-003"):
        pointer=next((m for m in self.__pointers if m['model'] == model), None) 
        key = pointer['pointer']
        if key < pointer['count']-1:
            key+=1
        else:
            key=0
        pointer['pointer']=key

    @retry(retry=retry_if_exception_type(OpenAI429Exception),wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6) | stop_after_delay(5))
    def __request(self,uri,body,model):
        point=self.__getModel(model)
        key=point["key"]
        end=point["endpoint"]
        deployment=point["deployment"]

        headers={"Content-Type":"application/json","api-key":key}
        uri = uri.format(end=end,deployment=deployment)

        request = requests.post(uri, headers=headers, json=body)
        print(f"end={end}")
        print(f"request.status_code={request.status_code}")

        if request.status_code == 429:
            # move to the previous endpoint in the endpoints array
            self.__incrementModel(model)
            raise self.OpenAI429Exception("OpenAI API rate limit exceeded. Retrying...")
 
        response = request.json()
        return response

    def Embedding(self,text,model="text-embedding-ada-002"):
        uri = "{end}/openai/deployments/{deployment}/embeddings?api-version=2022-12-01"    
        response = self.__request(uri,{"input":text},model)
        embeddings = response['data'][0]['embedding']
        return embeddings

    def Completion(self,prompt, model="text-davinci-002", max_tokens=150, temperature=0.9):
        uri = "{end}openai/deployments/{deployment}/completions?api-version=2022-12-01"

        body={
            "prompt": prompt,
            "max_tokens":max_tokens,
            "temperature":temperature,
            "stop":["#"]
        }

        #convert body to utf8 bytes
        # body_utf8 = bytes(json.dumps(body), 'utf-8')
        response = self.__request(uri,body,model)
        if( "error" in response ):
            #Read this from this json {"error":{"message":"blah"}}
            return response['error']['message']
        else:
            return response['choices'][0]['text']

    def Chat(self,messages, model="gpt-35-turbo", max_tokens=150, temperature=0.9):
        uri="{end}/openai/deployments/{deployment}/chat/completions?api-version=2023-05-15"

        body={
            "messages": messages,
            "max_tokens":max_tokens,
            "temperature":temperature
        }

        #convert body to utf8 bytes
        #body_utf8 = bytes(json.dumps(body), 'utf-8')
        response = self.__request(uri,body,model)
        if( "error" in response ):
            #Read this from this json {"error":{"message":"blah"}}
            return response['error']['message']
        else:
            return response['choices'][0]['message']
    
    def Models(self):
        """Returns a list of models and their endpoints"""
        models = []
        for model in self.__models:
            models.append({"endpoint":model["endpoint"],"model":model["model"],"deployment":model["deployment"]})
        return models
path="json.env"
if os.path.exists(path):
    # load json.env from file
    with open(path) as json_file:
        data = json.load(json_file)

    aoaiCls = AOAIHelper(data)
else:
    aoaiCls = AOAIHelper()
    
print(aoaiCls.Models())

emb = aoaiCls.Embedding("This is a tester message")
print(len(emb))



[{'endpoint': 'https://aoai-dai-dev-use.openai.azure.com/', 'model': 'gpt-35-turbo', 'deployment': 'gpt-35-turbo'}, {'endpoint': 'https://aoai-dai-dev-use.openai.azure.com/', 'model': 'code-davinci-002', 'deployment': 'code-davinci-002'}, {'endpoint': 'https://aoai-dai-dev-use.openai.azure.com/', 'model': 'text-davinci-003', 'deployment': 'text-davinci-003'}, {'endpoint': 'https://aoai-dai-dev-use.openai.azure.com/', 'model': 'text-embedding-ada-002', 'deployment': 'text-embedding-ada-002'}, {'endpoint': 'https://aoai-dai-dev-scus.openai.azure.com/', 'model': 'gpt-35-turbo', 'deployment': 'gpt-35-turbo'}, {'endpoint': 'https://aoai-dai-dev-scus.openai.azure.com/', 'model': 'text-embedding-ada-002', 'deployment': 'text-embedding-ada-002'}, {'endpoint': 'https://aoai-dai-dev-scus.openai.azure.com/', 'model': 'text-davinci-003', 'deployment': 'davinci-003'}]
end=https://aoai-dai-dev-use.openai.azure.com/
request.status_code=200
1536


In [6]:
# Chat with the AI service and run this over and over to get a 429
messages=[]
question="Tell me a funny joke"
messages.append({"role":"user","content":question})
answer=aoaiCls.Chat(messages)
print(answer)

# If one resource gets a 429, it will move to the next resource

end=https://aoai-dai-dev-use.openai.azure.com/
request.status_code=429
end=https://aoai-dai-dev-scus.openai.azure.com/
request.status_code=200
{'role': 'assistant', 'content': "Why don't scientists trust atoms? \nBecause they make up everything."}
