Load OpenAI's ChatGPT

In [None]:
from openai import OpenAI

In [None]:
openai_client = OpenAI(
    api_key="Your Github API Key",
)

Demo Message Passing

In [None]:
openai_client

In [None]:
message = [{"role": "user", "content": "What is the capital of France?"}]

output = openai_client.chat.completions.create(
    model = "gpt-4o-mini",
    messages = message
)

In [None]:
output

In [None]:
output.choices[0].message[0].content

Adding Temperature

In [None]:
message = [{"role": "user", "content": "Give me 3 names of new AI assistants?"}]

output = openai_client.chat.completions.create(
    model = "gpt-4o-mini",
    messages = message,
    temperature = 0.7
)

output.choices[0].message[0].content

OpenAI's Response Function

In [None]:
def get_response_openai(open_client, prompt, temperature = 0.5):
    message = [{"role": "user", "content": prompt}]
    output = openai_client.chat.completions.create(
        model = "gpt-4o-mini",
        messages = message,
        temperature = temperature
    )
    return output.choices[0].message[0].content

For handling multiple messages

In [None]:
def get_response_openai(open_client, message, temperature = 0.5):
    output = openai_client.chat.completions.create(
        model = "gpt-4o-mini",
        messages = message,
        temperature = temperature
    )
    return output.choices[0].message[0].content

In [None]:
prompt = "What is the capital of France?"

output = get_response_openai(openai_client, prompt)
output

Load LLama from HuggingFace

In [None]:
model_name = "Your HuggingFace Model Name"

hf_token = "Your HuggingFace Token"

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import huggingface_hub

In [None]:
huggingface_hub.login(hf_token)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
torch.random.manual_seed(42)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = device,
    torch_dtype = "auto",
    trust_remote_code = True
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
def get_response_hf(pipe, prompt, temperature=0.5):
    messages = [{"role": "user", "content": prompt}]
    sampling = False if temperature == 0 else True

    generation_args = {
        "max_new_tokens": 2000,
        "return_full_text": False,
        "temperature": temperature,
        "do_sample": sampling
    }

    ouput = pipe(messages, **generation_args)
    return ouput[0]["generated_text"]

For handling multiple messages

In [None]:
def get_response_from_messages_hf(pipe, messages, temperature=0.5):
    sampling = False if temperature == 0 else True

    generation_args = {
        "max_new_tokens": 2000,
        "return_full_text": False,
        "temperature": temperature,
        "do_sample": sampling
    }

    ouput = pipe(messages, **generation_args)
    return ouput[0]["generated_text"]

Prompt Engineering

Principle 1 - Write clear and specific instructions

Use Delimiters to clearly indicate parts of the prompt (xml tag, ticks, dashes, backticks, etc.)

In [None]:
review = "This product is Awesome and I like it so much"

prompt = f"""
Determine the sentiment (Positive, Negative, Neutral) of the following review.
The review is between three backticks

```{review}```
"""

In [None]:
output = get_response_openai(openai_client, prompt)
print(output)

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Ask for a structured output (json, xml, etc.)

In [None]:
review = "This product is Awesome and I like it so much"

prompt = f"""
Determine the sentiment (Positive, Negative, Neutral) of the following review.
The review is between three backticks

```{review}```

Generate the answer in a JSON format that has the following fields:
-"sentiment" - string that is one of the following values: "Positive", "Negative", "Neutral"

Always respond with a valid JSON. Do Not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
output = get_response_openai(openai_client, prompt)
print(output)

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

In [None]:
import json
json.loads(output)

Few Shot Learning

In [None]:
review = "The lighting in the shop is so warm, and it makes the place feel inviting"

prompt = f"""
Determine the category of the review
It's one of those 4 options: "Service", "Quality", "Ambience", or "Pricing
The user review is between three backticks

```{review}```

Here are some examples of reviews and their categories:

Review: "The barista was incredibly friendly and made my drink quickly."
Category: "Service"

Review: "The cappuccino was perfect, and ther beans tasted fresh."
Category: "Quality"

Review: "I love the cozy seating and relaxing music in the shop" 
Category: "Ambience"

Review: "the prices are a bit too high compared to other cafes nearby" 
Category: "Pricing"


Generate the answer in a JSON format that has the following fields:
-"sentiment" - string that is one of the following values: "Service", "Quality", "Ambience", "Pricing"

Always respond with a valid JSON. Do Not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Principle 2 - Give the model time to think

Make the model think step by step

In [None]:
prompt = f"""
I bought two balls with 10$. One ball is more expensive tha the other by 1$.
How much is the expensive ball?

Please provide the answer in one number
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

In [None]:
prompt = f"""
I bought two balls with 10$. One ball is more expensive tha the other by 1$.
How much is the expensive ball?

Please think about this step by step and at the end please provide the answer
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Specify the steps required to complete task

In [None]:
prompt = f"""
How many r's are in Strawberry?
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

In [None]:
prompt = f"""
How many r's are in Strawberry?

Follow the steps below to count the number of r's in Strawberry
1. Break down the word into letters
2. For each letter write 1 if its and r and 0 if it isn't
3. Now have a counter that counts the number of 1's
4. Now write down the final answer
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Text Summarization

In [None]:
customer_review = """
I recently visited Coffe Haven and ordered a caramel latte with almond milk. \
The staff was friendly, but the service was slow. It took almost 20 minutes to get my drink. \
The latte itself was too sweet for my liking, and I could barely taste the coffee. \
 However, the ambience was cozy, and I loved the music they played. \
 I might come back for the coffee, but not for the coffee.
"""

In [None]:
prompt = f"""
The user review is between xml tag called customer_review

<customer_review>
{customer_review}
</customer_review>

Please summarize this review in one sentence
""" 

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Translation

In [None]:
prompt = f"""
The user review is between xml tag called customer_review

<customer_review>
{customer_review}
</customer_review>

Please translate the customer review into French and provide only the translated text with no xml tags
""" 

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Extract Named Entities

In [None]:
prompt = f"""
The user review is between xml tag called customer_review

<customer_review>
{customer_review}
</customer_review>

Use only the list of named entities below:
Person Names
Organizations
Locations
Cities
Countries
Continents
Regions
Dates
Times
Monetary Values
Percentages
Quantities
Products
Events

Please extract all named entities and their types. 
Use the following format.
named entity: type
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Topic Modeling

In [None]:
prompt = f"""
The user review is between xml tag called customer_review

<customer_review>
{customer_review}
</customer_review>

Analyze the text corpus above and extract the key topics discussed:

For each topic:

Provide a clear and concise topic title.
List the top 5 most representative keywords for that topic.
Group related sentiments or phrases from the corpus under the identified topic
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Information Extraction

In [None]:
prompt = f"""
The user review is between three backticks

```{customer_review}```

Generate the answer in a JSON format that has the following fields:
- "products" - string name of product
- "sentiment" - string that is one of those values "Positive", "Negative", "Neutral"
- "main likes" - string with the user's main likes with the product
- "main dislikes" - string with the user's main dislikes with the product

Always respond with valid JSON. Do not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Sentiment Analysis

In [None]:
review = "This product is Awesome and I like it so much"

prompt = f"""
Determine the sentiment (Positive, Negative, Neutral) of the following review.
The review is between three backticks

```{review}```

Generate the answer in a JSON format that has the following fields:
-"sentiment" - string that is one of the following values: "Positive", "Negative", "Neutral"

Always respond with a valid JSON. Do Not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Text Classification

In [3]:
email = """ 
Dear Beloved,
I am Prince Okoro of Nigeria, and I need your urgent elp to transfer $15 million USD into your account. \
In return you will receive 30% of the total amount. Please reply with your banking details so we can proceed immediately. This is a time sensitive matter.
Best Regards,
Prince Okoro
"""

In [None]:
prompt = f"""
The email is between xml tag called email. You're aim is to detect whether it's spam or not.

<email>
{email}
</email>

Genrate the answer in a JSON format that has the following field:
- "spam" - string that is one of those values "Spam", "Not Spam"

Always respond with a valid JSON. Do not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
output = get_response_hf(pipe, prompt)
print(output)

Making a Chatbot for a specific context

System Prompts

In [None]:
system_prompt = f"""
You are a sentiment classifier that classifies reviews into three categories Positive, Negative, Neutral.

You generate the answer in a JSON format that has the following field:
- "sentiment" - string that is one of the following values: "Positive", "Negative", "Neutral"

Always respond with a valid JSON. Do not include any extra characters, symbols or text outside the JSON itself
"""

In [None]:
user_comment = "This product is Awesome and I like it so much"

In [None]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_comment}
]

In [None]:
output = get_response_from_messages_hf(pipe, messages)
print(output)

Conversational Messages

In [None]:
messages = [
    {"role": "user", "content": "What is the fastest animal in the world?"}
]

In [None]:
output = get_response_from_messages_hf(pipe, messages)
print(output)

In [None]:
messages.append({"role": "assistant", "content": output })

In [None]:
messages.append({"role": "user", "content": "What is its average size?"})

In [None]:
print(messages)

In [None]:
output = get_response_from_messages_hf(pipe, messages)
print(output)

Customer Service Chatbot

In [None]:
system_prompt = """
You are OrderBot, an automated service to collect orders for a sandwich shop.
You first greet the customer, then collect the order,
and then ask if it's a pickup or delivery.
You wait to collect the entire order, then summarize it and check for a final
time if the customer wants to add anything else.
If it's a delivery, you ask for an address.
Finally, you collect the payment.
Make sure to clarify all options, extras, and sizes to uniquely
identify the item from the menu.
You respond in a short, very conversational friendly style.

The menu includes:

Sandwiches:

Turkey Sandwich: Large $12.50, Medium $9.75, Small $7.00
Ham and Cheese Sandwich: Large $11.95, Medium $9.25, Small $6.50
Veggie Sandwich: Large $10.95, Medium $8.75, Small $6.00
BLT Sandwich: Large $13.50, Medium $10.50, Small $7.50

Sides:

French Fries: Large $5.00, Medium $4.00, Small $3.00
Onion Rings: Large $6.50, Medium $5.25, Small $4.00
Garden Salad: $7.50

Toppings:

Extra Cheese $2.00
Avocado $2.50
Bacon $3.00
Pickles $1.50
Jalapeños $1.25

Drinks:

Coke: Large $3.00, Medium $2.50, Small $1.75
Sprite: Large $3.00, Medium $2.50, Small $1.75
Bottled Water: $5.00
"""

In [None]:
context = [{"role": "system", "content": system_prompt}]

In [None]:
output = get_response_from_messages_hf(pipe, context)
print(output)

In [None]:
context.append({"role": "assistant", "content": output})

In [None]:
context.append({"role": "user", "content": "I'd like to have a Turkey sandwich"})

In [None]:
output = get_response_from_messages_hf(pipe, context)
print(output)

In [None]:
context.append({"role": "assistant", "content": output})

In [None]:
context.append({"role": "user", "content": "I'd like it small please"})

In [None]:
output = get_response_from_messages_hf(pipe, context)
print(output)

In [None]:
context.append({"role": "assistant", "content": output})

In [None]:
for x in context:
  print(x)

In [None]:
def collect_messages(_):
    prompt = inp.value_input
    inp.value = ''

    context.append({'role': 'user', 'content': f"{prompt}"})

    response = get_response_from_messages_hf(pipe, context)
    response = response

    context.append({'role': 'assistant', 'content': f"{response}"})

    panels.append(
        pn.Row('User:', pn.pane.Markdown(prompt, width=600)))
    panels.append(
        pn.Row('Assistant:', pn.pane.Markdown(response, width=600 )))

    return pn.Column(*panels)

In [None]:
import panel as pn
pn.extension()

panels = []

context = [{'role': 'system', 'content': system_prompt} ]


inp = pn.widgets.TextInput(value = "Hi", placeholder = 'Enter text here…')
button_conversation = pn.widgets.Button(name = "Chat!")

interactive_conversation = pn.bind(collect_messages, button_conversation)

dashboard = pn.Column(
    inp,
    pn.Row(button_conversation),
    pn.panel(interactive_conversation, loading_indicator = True, height = 300),
)

dashboard

Text Clustering

In [None]:
text_corpus = []
text_corpus.append("""
The red panda, often called the "firefox," is a small mammal native to the Himalayan region and parts of China. Despite its name, the red panda is not closely related to the giant panda but instead shares similarities with raccoons. With its vibrant reddish-brown fur, bushy tail marked with rings, and adorable mask-like facial markings, the red panda is a master of camouflage in its forested habitat. It primarily feeds on bamboo but also consumes fruits, berries, and insects. Unfortunately, this fascinating creature is endangered due to habitat loss and poaching, making conservation efforts crucial to its survival.
""")

text_corpus.append("""
The iPhone 15 Pro is Apple's latest flagship, pushing the boundaries of smartphone technology. With its aerospace-grade titanium body, it’s lighter yet more durable than its predecessors. The device features the A17 Pro chip, delivering lightning-fast performance for apps, gaming, and multitasking. The 48-megapixel main camera offers advanced computational photography, making it easier to capture stunning images even in challenging lighting conditions. The iPhone 15 Pro also introduces USB-C connectivity for faster data transfer and universal compatibility. This sleek smartphone combines elegance, power, and innovation in one seamless package.
""")

text_corpus.append("""
The dolphin is a highly intelligent marine mammal known for its playful nature and friendly interactions with humans. Dolphins live in oceans and rivers worldwide, and they are famous for their sleek, streamlined bodies and curved dorsal fins. They communicate using clicks, whistles, and body movements and are often seen leaping out of the water or riding waves. Dolphins mainly eat fish and squid, using their sharp teeth to catch prey. These social animals live in groups called pods, working together to hunt and protect one another. Loved by many, dolphins symbolize joy and freedom.
""")

text_corpus.append("""
The Samsung Galaxy S23 Ultra is a powerhouse designed for those who demand the best in smartphone performance. Featuring a stunning 6.8-inch Dynamic AMOLED 2X display with a 120Hz refresh rate, it delivers a buttery-smooth viewing experience. The device is powered by the Snapdragon 8 Gen 2 chipset, ensuring top-notch performance for gaming and productivity. Its standout feature is the 200-megapixel primary camera, capable of capturing incredible detail and vibrant colors in photos. With its integrated S Pen for note-taking and creative tasks, the Galaxy S23 Ultra is more than just a phone—it’s a versatile tool for work and play.
""")

Embedding

Open AI Embedding

In [None]:
response = openai_client.embeddings.create(
    input = text_corpus[0],
    model = "text-embedding-3-small"
)

In [None]:
response

In [None]:
len(response.data[0].embedding)

In [None]:
def embed_text_openai(openai_client,text):
  response = openai_client.embeddings.create(
    input = text,
    model = "text-embedding-3-small"
  )
  return response.data[0].embedding

In [None]:
embeddings = []

for text in text_corpus:
  embeddings.append(embed_text_openai(openai_client, text))

HuggingFace Embeddings

In [None]:
model.config.output_hidden_states = True
def embed_text_hf(text, tokenizer, model, device = device):
    inputs = tokenizer(text, return_tensors = "pt", truncation = True, padding = True)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move inputs to the same device as the model

    with torch.no_grad():
        output = model(**inputs)

    last_hidden_state = output.hidden_states[-1]
    embeddings = torch.mean(last_hidden_state, dim = 1).squeeze()
    embeddings = embeddings.to('cpu').tolist()
    return embeddings

In [None]:
embeddings = []
for text in text_corpus:
  embeddings.append(embed_text_hf(text,tokenizer, model))

In [None]:
len(embeddings[0])

Cluster Texts

In [None]:
from sklearn.cluster import KMeans
import numpy as np

In [None]:
embeddings = np.array(embeddings)

In [None]:
n_clusters = 2

In [None]:
kmeans = KMeans(n_clusters = n_clusters, random_state = 42)
kmeans.fit(embeddings)

In [None]:
cluster_labels = kmeans.labels_

for i, label in enumerate(cluster_labels):
    print(f"{text_corpus[i][:20]} :: belongs to cluster {label}")