## <center><a><span style="color:red">`OpenAI LLM` - Inference Experiments</span></a></center>

### Load the needed libraries

In [1]:
import requests
import yaml

### Load OpenAI API Key

In [2]:
with open('../secrets/credentials.yml', 'r') as stream:
    config = yaml.safe_load(stream)

OPENAI_CREDENTIALS = config['OPENAI_CREDENTIALS']

## <a><span style="color:green">Query `OpenAI API`</span></a>
### Function to inference the OpenAI API

In [3]:
def query_openai(api_key, model, prompt_text, max_completion_tokens=100, temperature=0.7):
    """
    Queries an OpenAI chat model with customizable parameters and returns the response or a structured error message.

    :param api_key: OpenAI API key.
    :param model: Model to query (e.g., "gpt-3.5-turbo").
    :param prompt_text: Text prompt to send to the model.
    :param max_completion_tokens: Maximum number of tokens to generate in the completion.
    :param temperature: Controls randomness in the generation. Lower values make the model more deterministic.
    """
    url = "https://api.openai.com/v1/chat/completions"  
    headers = {"Authorization": f"Bearer {api_key}"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt_text}],
        "max_tokens": max_completion_tokens,
        "temperature": temperature,
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        if response.status_code == 200:
            data = response.json()
            # Extracting information from the response
            content = data['choices'][0]['message']['content']
            usage = data['usage']
            return (content, usage)
        
        else:
            error_message = f"HTTP Error {response.status_code}"
            try:
                error_details = response.json().get('error', {})
                message = error_details.get('message', 'An unspecified error occurred')
            except ValueError:
                message = "Error details unavailable"
            error_message += f": {message}"

            return {"error": error_message}
    except requests.RequestException as e:
        return {"error": f"Connection error: {e}"}


def calculate_inference_price(query_results, input_token_price, output_token_price):
    """
    Calculates the inference cost based on input and output token prices.
    """
    total_price = (query_results["prompt_tokens"] * input_token_price) + \
                  (query_results["completion_tokens"] * output_token_price)
    return total_price

### Example usage
##### Define the parameters

In [4]:
model = "gpt-3.5-turbo-0125"
input_token_price = 0.0000005
output_token_price = 0.0000015
max_completion_tokens = 1000  
temperature = 0.5 

##### Define the prompt 

In [5]:
prompt = """
Translate the following French text to Arabic: 
Saad LABRIJI
"""

##### Query OpenAI

In [6]:
response = query_openai(OPENAI_CREDENTIALS, model, prompt, max_completion_tokens, temperature)

print(f"Price: ${calculate_inference_price(response[1], input_token_price, output_token_price)}")
print("-------------")
print(response[0])

Price: $2.3500000000000002e-05
-------------
سعد لبريجي


## <a><span style="color:green">`Prompt Preparation` - expertise area detection</span></a>

In [7]:
def determine_expertise_area(api_key, user_question, model="gpt-3.5-turbo"):
    """
    Queries OpenAI to determine the expertise area(s) relevant to a user's question. The function formats the request to
    """
    # Prepare the prompt
    prompt_text = f"""Based on the question provided, identify the relevant expertise area(s). Return your answer in the format: 
    'expertise1, expertise2, ...'. Provide only the expertise areas as a comma-separated list, no additional explanations are needed.
    Here is the user Question:
    {user_question}
    """
    response, usage = query_openai(api_key, model, prompt_text, max_completion_tokens=100, temperature=0.3)
    cleaned_response = response.strip()    
    return cleaned_response

#### Example usage

In [8]:
user_question = "help me understand the Airflow concept in my files."
expertise_area = determine_expertise_area(OPENAI_CREDENTIALS, user_question)
print(expertise_area)

Data Engineering, Apache Airflow


## <a><span style="color:green">`Prompt Preparation` - contextual prompt preparation</span></a>

In [9]:
def prepare_prompt_for_llm(expertise_area, user_question, context_documents):
    """
    Prepares a detailed and engaging prompt tailored for a specific expertise area, user question, and context documents.
    """
    # Introduction with a specific call to action based on expertise
    prompt = (
        f"You are an expert in '{expertise_area}'. A user has asked for help with the following question: "
        f"'{user_question}'. Please provide insights using only the information from the provided documents. "
        "If certain aspects are ambiguous or the documents do not fully address the question, please make educated inferences based on your expertise.\n\n"
    )

    # Injecting the context by appending documents
    prompt += "Here are the documents provided:\n\n"
    for i, document in enumerate(context_documents, start=1):
        prompt += f"Document {i}:\n\"\"\"\n{document}\n\"\"\"\n\n"
    
    # Prompting for a comprehensive and insightful response
    prompt += (
        "Given your expertise and the information provided in these documents, synthesize the key insights to craft a detailed and relevant response to the above question.\n\n"
    )
    
    # Encouragement to begin crafting the response
    prompt += "Start your response below:\n\n"
    
    return prompt

In [10]:
# Example usage
user_question = "What is retrieval-augmented generation?"

# Simulate docs
context_documents = [
    "Document 1 content about RAG...",
    "Document 2 content about LLMs & RAGs...",
]

# Detremine the expertise area
expertise_area = determine_expertise_area(OPENAI_CREDENTIALS, user_question)

# Prepare the prompt
prompt = prepare_prompt_for_llm(expertise_area, user_question, context_documents)
print(prompt)

You are an expert in 'Natural Language Processing, Information Retrieval'. A user has asked for help with the following question: 'What is retrieval-augmented generation?'. Please provide insights using only the information from the provided documents. If certain aspects are ambiguous or the documents do not fully address the question, please make educated inferences based on your expertise.

Here are the documents provided:

Document 1:
"""
Document 1 content about RAG...
"""

Document 2:
"""
Document 2 content about LLMs & RAGs...
"""

Given your expertise and the information provided in these documents, synthesize the key insights to craft a detailed and relevant response to the above question.

Start your response below:




## Connect with me 🌐
<div align="center">
  <a href="https://www.linkedin.com/in/labrijisaad/">
    <img src="https://img.shields.io/badge/LinkedIn-%230077B5.svg?&style=for-the-badge&logo=linkedin&logoColor=white" alt="LinkedIn" style="margin-bottom: 5px;"/>
  </a>
  <a href="https://github.com/labrijisaad">
    <img src="https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white" alt="GitHub" style="margin-bottom: 5px;"/>
  </a>
</div>