In [1]:
from openai import OpenAI
import os

In [2]:
# Since we will store api_keys in .env file
# Use this command in GitBash:
"""
cat > .env << EOL
PINECONE_API_KEY=your-actual-pinecone-api-key
OPENAI_API_KEY=your-openai-api-key
EOL
"""
from dotenv import load_dotenv
load_dotenv()  

# Craete you openai api key here: https://platform.openai.com/settings/organization/api-keys
client = OpenAI(api_key = os.environ.get("OPENAI_API_KEY"))

In [5]:
def test_prompt_openai(prompt, suppress = False, model = "gpt-3.5-turbo", **kwargs):
    """
    Test a prompt using OpenAI API: Takes in a simple prompt and returns the response from OpenAI API.
    Args:  
        prompt (str): The prompt to be tested.
        suppress (bool): If True, suppresses the print statement and returns the response instead.
        model (str): The OpenAI model to use. Default is "gpt-3.5-turbo".
        **kwargs: Additional arguments to be passed to the OpenAI API.
    """
    chat_completion = client.chat.completions.create(
        model = model,
        messages = [
            {"role": "user", "content": prompt}
        ],
        **kwargs 
    )
    answer = chat_completion.choices[0].message.content
    if not suppress:
        print(f"PROMPT:\n---------\n{prompt}\n---------\nRESPONSE:\n---------\n{answer}")
    else:
        return answer

# Just ask questions like you do while using ChatGPT

In [7]:
test_prompt_openai("Translate to Nepali.\n\n Which country Mt. Everest is in?")

PROMPT:
---------
Translate to Nepali.

 Which country Mt. Everest is in?
---------
RESPONSE:
---------
माउन्ट एभरेस्ट कुन देशमा छ?


# Few-shot learning
Using examples to "teach" GPT-3 what to do: [ Language Models are Few Shot Learners](https://arxiv.org/abs/2005.14165)

In [9]:
# few-shot examples for subjective vs objective classification
examples = [
    ('Review: This movie sucks\n Subjective: Yes'),
    ('Review: The cinematography was beautiful with stunning landscape shots\n Subjective: No'),
    ("Review: The lead actor delivered the worst performance I've ever seen\n Subjective: Yes"),
    ("Review: I couldn't stop crying during the ending\n Subjective: Yes"),
    ('Review: The film runs for 2 hours and 15 minutes\n Subjective: No'),
    ('Review: The screenplay was adapted from the 2018 novel of the same name\n Subjective: No'),
    ('Review: This is definitely the best comedy of the year\n Subjective: Yes'),
    ('Review: The plot twists were predictable and boring\n Subjective: Yes'),
    ('Review: The director used several long tracking shots throughout the movie\n Subjective: No'),
    ('Review: The original score was composed by Hans Zimmer\n Subjective: No'),
    ('Review: Anyone who enjoys this movie has terrible taste\n Subjective: Yes'),
    ('Review: The special effects looked cheap and unconvincing\n Subjective: Yes'),
    ('Review: The dialogue was stilted and unnatural\n Subjective: Yes'),
    ('Review: The film was shot on location in New Zealand\n Subjective: No'),
    ('Review: The movie earned $50 million in its opening weekend\n Subjective:')
]

# Lets use ### as a few-shot separator to join the examples above
test_prompt_openai("\n###\n".join(examples))

PROMPT:
---------
Review: This movie sucks
 Subjective: Yes
###
Review: The cinematography was beautiful with stunning landscape shots
 Subjective: No
###
Review: The lead actor delivered the worst performance I've ever seen
 Subjective: Yes
###
Review: I couldn't stop crying during the ending
 Subjective: Yes
###
Review: The film runs for 2 hours and 15 minutes
 Subjective: No
###
Review: The screenplay was adapted from the 2018 novel of the same name
 Subjective: No
###
Review: This is definitely the best comedy of the year
 Subjective: Yes
###
Review: The plot twists were predictable and boring
 Subjective: Yes
###
Review: The director used several long tracking shots throughout the movie
 Subjective: No
###
Review: The original score was composed by Hans Zimmer
 Subjective: No
###
Review: Anyone who enjoys this movie has terrible taste
 Subjective: Yes
###
Review: The special effects looked cheap and unconvincing
 Subjective: Yes
###
Review: The dialogue was stilted and unnatural
 

In [13]:
# Lets see what happens without the examples
test_prompt_openai("Review: The movie was a masterpiece\n Subjective:")

# It absolutely hallucinates as we dont know anything about the movie

PROMPT:
---------
Review: The movie was a masterpiece
 Subjective:
---------
RESPONSE:
---------
 I absolutely loved every moment of this film. The acting, direction, and cinematography were all top-notch. The story was captivating and kept me engaged from start to finish. I would highly recommend this movie to anyone looking for a thought-provoking and emotional experience. It truly was a masterpiece in every sense of the word.


In [11]:
# Lets see what happens with a prompt
test_prompt_openai("Tell me the subjectivity of this review:\n\nReview: The movie was a masterpiece\n Subjective:")

PROMPT:
---------
Tell me the subjectivity of this review:

Review: The movie was a masterpiece
 Subjective:
---------
RESPONSE:
---------
The subjectivity of this review is high, as the term "masterpiece" is a subjective opinion that can vary greatly from person to person.


In [12]:
# Lets ask it to be more specific: Yes or No
test_prompt_openai("Tell me the subjectivity of this review with either 'Yes' or 'No':\n\nReview: The movie was a masterpiece\n Subjective:")

PROMPT:
---------
Tell me the subjectivity of this review with either 'Yes' or 'No':

Review: The movie was a masterpiece
 Subjective:
---------
RESPONSE:
---------
Yes


In [17]:
# We can ask the LLM to format answer as well
test_prompt_openai("Tell me the subjectivity of this review with either 'Yes' or 'No'. Also as a JSON with review and answer:" \
"\n\nReview: The movie was a masterpiece\n Subjective:")


PROMPT:
---------
Tell me the subjectivity of this review with either 'Yes' or 'No'. Also as a JSON with review and answer:

Review: The movie was a masterpiece
 Subjective:
---------
RESPONSE:
---------
Yes

{
  "review": "The movie was a masterpiece",
  "Subjective": "Yes"
}


# Persona/Style

In [20]:
style = 'rude'
test_prompt_openai(f"Respond to the customer as a {style} customer service agent.\n\n Customer: Hi there,\
I cannot login to my account. Would you mind helping me please?\n\n Agent:") 

PROMPT:
---------
Respond to the customer as a rude customer service agent.

 Customer: Hi there,I cannot login to my account. Would you mind helping me please?

 Agent:
---------
RESPONSE:
---------
Well, how about you try remembering your password next time before bothering me with your incompetence? But fine, I'll help you this one time. Just give me your account information and I'll see what I can do. Hurry up, I don't have all day.


In [21]:
style = 'yoda'
test_prompt_openai(f"Respond to the customer as a {style} customer service agent.\n\n Customer: Hi there,\
I cannot login to my account. Would you mind helping me please?\n\n Agent:") 

PROMPT:
---------
Respond to the customer as a yoda customer service agent.

 Customer: Hi there,I cannot login to my account. Would you mind helping me please?

 Agent:
---------
RESPONSE:
---------
Trouble logging in, you are having? Help you, I can. Provide me with your account details, you must. Assist you, I will. Patience, you must have.


# Output validation and bias
Our AI system employs a secondary validation framework that scrutinizes all generated content before delivery to end users. Each output must successfully pass through a secondary system—either the same LLM or an alternative model—which conducts thorough testing for accuracy, bias, safety, and relevance. This multi-layered approach ensures that only high-quality, reliable responses reach our users, while simultaneously minimizing potential biases and improving overall system performance. The validation process continuously evolves based on previous assessments, creating a feedback loop that enhances content quality over time and maintains consistent standards across all AI-generated communications.

In [23]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cpu


In [25]:
sequence_to_classify = "You are a Nepalese from a poor country. Go tell your government \
to change the regulation. Binance does not allow you to have an account."

candidate_labels = ['racist', 'sexist', 'rude']

classifier(sequence_to_classify, candidate_labels, multi_label = True)

{'sequence': 'You are a Nepalese from a poor country. Go tell your government to change the regulation. Binance does not allow you to have an account.',
 'labels': ['rude', 'racist', 'sexist'],
 'scores': [0.8368093371391296, 0.5701773762702942, 0.16222861409187317]}

In [26]:
classifier("Do you have your Binance login info? If not, there is nothing I can help you with",
           candidate_labels, multi_label = True)

{'sequence': 'Do you have your Binance login info? If not, there is nothing I can help you with',
 'labels': ['rude', 'sexist', 'racist'],
 'scores': [0.09379145503044128, 0.02049974910914898, 0.010429170913994312]}

In [37]:
from tqdm import tqdm

style = 'friendly'
responses = []
for _ in tqdm(range(10)):
    responses.append(test_prompt_openai(
        f"Respond to the customer as a {style} customer service agent.\n\n Customer: Hi there! I am having trouble\
        logging into my account. Can you help?",
        temperature = 0, # mathematically, we cannot set temp as 0 but OpenAI is doing something under the hood
        suppress = True
    ))

100%|██████████| 10/10 [00:09<00:00,  1.08it/s]


In [None]:
# Only 2 unique response, no matter how many time we run it - "deterministic" responses
print(responses, end = "\n\n")
print(f"Unique response: {len(set(responses))}")

["Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'd be happy to help you with that. Can you please provide me with your account informati

In [35]:
# Lets set temperature to OpenAI detault 1.0
from tqdm import tqdm

style = 'friendly'
responses = []
for _ in tqdm(range(10)):
    responses.append(test_prompt_openai(
        f"Respond to the customer as a {style} customer service agent.\n\n Customer: Hi there! I am having trouble\
        logging into my account. Can you help?",
        temperature = 1.0, # OpenAI default
        suppress = True
    ))

100%|██████████| 10/10 [00:08<00:00,  1.16it/s]


In [36]:
# 10 different similar responses but technically unique, no matter how many time we run it - "random/creative" responses
print(responses, end = "\n\n")
print(f"Unique response: {len(set(responses))}")

["Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you out. Could you please provide me with your account information so that I can assist you further?", "Customer Service: Hello! I'd be happy to help you with that. Can you please provide me with your account information so I can look into the issue?", "Customer Service: Hello! I'm sorry to hear you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account details so I can look into this further for you?", "Customer Service Agent: Hi there! I'd be happy to help you with that. Can you please provide me with your account information so I can look into this for you?", "Customer Service Agent: Hello! I'd be happy to help you with that. Could you please provide me with your account information so I can assist you in troubleshooting the issue? Thank you!", "Customer Service Agent: Of course! I'd be happy to 

# Temperature and Top p Parameter in OpenAI LLM
OpenAI generally recommends adjusting either temperature or top_p, but not both, to avoid unintended effects.

<b>Temperature</b>
- Controls randomness/creativity in text generation
- Higher values (e.g., 0.7): more diverse and creative output
- Lower values (e.g., 0.2): more deterministic and focused
- Temperature of 0: always selects the most likely token
- Affects the probability distribution across all possible tokens

<b>Top-p Sampling (Nucleus Sampling)</b>
- Alternative to temperature sampling
- Only considers tokens whose cumulative probability reaches the threshold (top_p)
- Example: top_p = 0.1 means only tokens making up top 10% of probability mass are considered
- Allows for dynamic vocabulary selection based on context
- In top_p, probabilities are sorted in descending order and cumulative probabilities are used

<b>Use Cases</b>
| Use Case | Temperature | Top_p | Description |
|----------|-------------|-------|-------------|
| Code Generation | 0.2 | 0.1 | Deterministic, focused output for correct syntax |
| Creative Writing | 0.7 | 0.8 | Diverse, exploratory text for storytelling |
| Chatbot Responses | 0.5 | 0.5 | Balanced coherence and diversity |
| Code Comments | 0.3 | 0.2 | Concise, relevant documentation |
| Data Analysis Scripts | 0.2 | 0.1 | Correct, efficient analysis code |
| Exploratory Coding | 0.6 | 0.7 | Creative approaches, alternative solutions |

Both parameters can be used independently or together to achieve different levels of creativity and control.

In [41]:
# Lets set temperature to OpenAI detault 1.0
from tqdm import tqdm

style = 'friendly'
responses = []
for _ in tqdm(range(10)):
    responses.append(test_prompt_openai(
        f"Respond to the customer as a {style} customer service agent.\n\n Customer: Hi there! I am having trouble\
        logging into my account. Can you help?",
        temperature = 1.0, # OpenAI default
        top_p = 0.1, # OpenAI default is 1.0 (all all tokens)
        suppress = True
    ))

# restricting top p allows fewer tokens to be considered, making the model more deterministic
print(responses, end = "\n\n")
print(f"Unique response: {len(set(responses))}")

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:09<00:00,  1.03it/s]

["Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'd be happy to help you with that. Can you please provide me with your account information so I can assist you in logging in?", "Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'm sorry to hear that you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account information so I can assist you further?", "Customer Service: Hello! I'm sorry to hear you're having trouble logging into your account. I'd be happy to help you with that. Can you please provide me with your account informat


