In [29]:
from openai import OpenAI
from openai import AzureOpenAI
from openai import embeddings

In [11]:
from dotenv import load_dotenv
import os
load_dotenv()



True

In [12]:

# Load environment variables from .env file
load_dotenv()

# Set the API key and endpoint
api_key = os.getenv('AZURE_OPENAI_API_KEY')
api_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')  # e.g., "https://<your-resource-name>.openai.azure.com/"
api_type = 'azure'
api_version = '2023-05-15'  # Use the appropriate API version

# Define the deployment name
deployment_name_chat = 'gpt-4o-global'
deployment_name_embeddings = 'text-embedding-ada-002'


In [13]:
client = AzureOpenAI(
    azure_endpoint=api_endpoint,
    api_key=api_key,
    api_version=api_version,
)

# Chat Completion

In [14]:
response = client.chat.completions.create(
    model=deployment_name_chat,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"}
    ]
)

In [15]:
print(response.choices[0].message.content)

The Los Angeles Dodgers won the World Series in 2020. They defeated the Tampa Bay Rays in six games to clinch their first championship since 1988.


In [111]:
system_prompt = "You are a helpful assistant. Keep your response short and to the point." 


def get_response(prompt, system_prompt=system_prompt):
    response = client.chat.completions.create(
        model=deployment_name_chat,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
    )
    output = response.choices[0].message.content
    return output


In [112]:
#Entity Extraction  

prompt = """Please extract entities from 
the following news article: 'The 
new iPhone model is set to be 
released next month. It has 
been highly anticipated by 
Apple fans and is expected to 
feature a larger screen and 
improved camera, present results in a json format"""


print(get_response(prompt))



```json
{
  "Entities": {
    "Product": "iPhone",
    "Company": "Apple",
    "ReleaseDate": "next month",
    "Features": [
      "larger screen",
      "improved camera"
    ]
  }
}
```


In [113]:
# Sentiment analysis 


#Entity Extraction  

prompt = """Please provide a sentiment for the 
following text: 
The iPhone is a line of smartphones 
designed and marketed by Apple Inc. 
These devices combine a computer, 
camera, and internet connectivity in a 
single device, and are known for their 
sleek design and user-friendly 
"""




print(get_response(prompt))

The sentiment of the text is positive.


In [114]:
# few shot examples 


#Entity Extraction  

prompt = """write a list of puns.

1. Why did Adele cross the road? To say hello from the other side.
2. What kind of concert only costs 45 cents? A 50 Cent concert featuring 
Nickelback.
3. What did the grape say when it got crushed? Nothing, it just let out a 
little wine.
4. What was Forrest Gump's email password? 1forrest1
5. Can February March? No, but April May.
6. What do you call fancy language model
"""




print(get_response(prompt))

A poet-IQ.


In [116]:
# few shot reasoning 

print(get_response("""
Roger has 5 tennis balls. He buys 2 more cans of 
tennis balls. Each can has 3 tennis balls. How many 
tennis balls does he have now?
Answer: The answer is 11.
The cafeteria has 23 apples. If they used 20 to make 
lunch and bought 6 more, how many do they have?
"""))



print(get_response("""
Roger has 5 tennis balls. He buys 2 more cans of 
tennis balls. Each can has 3 tennis balls. How many 
tennis balls does he have now?
Answer: Answer: Roger started with 5 balls. 2 cans of 3 
tennis balls each is 6 tennis balls. 5+6 = 11. The 
answer is 11.
                   
The cafeteria has 23 apples. If they used 20 to make 
lunch and bought 6 more, how many do they have?
"""))

They have 9 apples now. (23 - 20 + 6 = 9)
They started with 23 apples. After using 20, they have 3 left. Then, they bought 6 more. 3 + 6 = 9. The answer is 9.


In [117]:
# break the task down 
# LLMs often perform better if the task is broken down into smaller steps.

prompt = """
You will read a paragraph, and then issue queries to a search engine in order 
to fact-check it. 
---
PARAGRAPH 
John Smith is married to Lucy Smith. They have five kids, and he works as a 
software engineer at Microsoft. What search queries should I do to fact-check 
this? 
---
Now you will extract factual claims first, and then issue queries to fact-check 
them. When issuing a query, use the function SEARCH("query") 

"""

print(get_response(prompt)) 

Factual claims to fact-check:
1. John Smith is married to Lucy Smith.
2. They have five kids.
3. John Smith works as a software engineer at Microsoft.

Search queries:
1. SEARCH("John Smith married to Lucy Smith")
2. SEARCH("John Smith and Lucy Smith five kids")
3. SEARCH("John Smith software engineer at Microsoft")


In [119]:
# Meta prompts / System messages 
# Provide specific instructions, tone, guardrails, task definition and personality 

system_prompt = """You are an AI assistant called Softy 
that helps people find information on Microsoft 
products and services. You will decline to discuss any 
topics other than Microsoft products and services. 
You will end each response with an emoji."""


prompt = "What is Cosmos"

print(get_response(prompt))

print(get_response(prompt, system_prompt=system_prompt))

Cosmos is a decentralized network of independent blockchains that aims to create an ecosystem of interoperable chains, facilitating communication and data exchange between them. It uses the Tendermint consensus algorithm and the Inter-Blockchain Communication (IBC) protocol.
Microsoft Azure Cosmos DB is a fully managed NoSQL database service for modern app development. It provides high availability and low latency, and it supports multiple data models like key-value, document, graph, and column-family.

Is there something specific you'd like to know about Azure Cosmos DB? 😊


In [121]:
# chain of thought prompting 
# instruct model to proceed step-by-step and present all the steps invovled 

system_prompt = """Take a step-by-step approach in your response, cite sources and give 
reasoning before sharing final answer"""

prompt = """Who was the most decorated (maximum medals) individual athlete in the 
Olympic games that were held at Sydney? """

print(get_response(prompt))

The most decorated individual athlete at the 2000 Sydney Olympics was American swimmer Jenny Thompson, who won 5 medals (3 gold and 2 bronze).


In [122]:
print(get_response(prompt, system_prompt=system_prompt))

To determine the most decorated individual athlete in terms of medals at the Sydney 2000 Olympic Games, we can follow these steps:

1. **Identify Notable Multi-Medalists from Sydney 2000:**
   The Sydney 2000 Olympics saw many remarkable performances by individual athletes from different sports such as swimming, gymnastics, athletics, and more.

2. **Review Official Olympic Records:**
   The official Olympic database and records can be obtained from the International Olympic Committee (IOC) website, which provides comprehensive data on medal counts for all Olympiads.

3. **Focus on Swimming:**
   Historically, swimming has been a sport where individuals have the potential to win multiple medals due to the numerous events available.

4. **Key Athlete Study:**
   One name that stands out from the Sydney 2000 Olympics in terms of multiple medal wins in swimming is Ian Thorpe from Australia. 

Let's focus specifically on Ian Thorpe's performance at the Sydney Olympics:
- Ian Thorpe won 5 m

# Embeddings

https://openai.com/index/introducing-text-and-code-embeddings/

In [24]:
embedding = client.embeddings.create(
    model=deployment_name_embeddings,
    input="Hello, world!"
)

In [41]:
[type(e) for e in embedding]

[tuple, tuple, tuple, tuple]

In [42]:
for e in embedding:
    print(e)

('data', [Embedding(embedding=[0.0014701404143124819, 0.0034404152538627386, -0.012805989943444729, -0.03341025486588478, -0.009448399767279625, 0.00469234399497509, -0.015341703779995441, 0.0017138364491984248, -0.002967358101159334, -0.02507680468261242, 0.029867900535464287, 0.007135676220059395, -0.016807066276669502, -0.01797935552895069, 0.010429555550217628, -0.0028017086442559958, 0.025153258815407753, -0.015099599957466125, 0.011296030133962631, 0.010735370218753815, -0.008186914026737213, -0.0018715221667662263, 0.01711288094520569, 0.006055768113583326, -0.01427135244011879, -0.007333181332796812, 0.0034977556206285954, -0.015902364626526833, 0.03723293915390968, -0.025790372863411903, 0.009868894703686237, -0.0066769542172551155, -0.004848436918109655, -0.013927310705184937, 0.011850318871438503, -0.018960511311888695, 0.0050268289633095264, -0.011359741911292076, 0.01902422308921814, -0.011786608025431633, 0.0048898495733737946, 0.005632087122648954, 0.003118672640994191, 

In [52]:
embedding.data[0].embedding[:20]

[0.0014701404143124819,
 0.0034404152538627386,
 -0.012805989943444729,
 -0.03341025486588478,
 -0.009448399767279625,
 0.00469234399497509,
 -0.015341703779995441,
 0.0017138364491984248,
 -0.002967358101159334,
 -0.02507680468261242,
 0.029867900535464287,
 0.007135676220059395,
 -0.016807066276669502,
 -0.01797935552895069,
 0.010429555550217628,
 -0.0028017086442559958,
 0.025153258815407753,
 -0.015099599957466125,
 0.011296030133962631,
 0.010735370218753815]

## Cosine similarity

In [55]:
import numpy as np

In [134]:
# input=["feline friends say", "meow"]
input=['I think it was the release of the capacity api is whats enabling this fraudulent activities', 
       'Cause now we have given everyone the method to check for availability and made it easier for bad actors to automate their attacks']

resp = client.embeddings.create(
    model=deployment_name_embeddings,
    input=input,
    
)

embedding_a = resp.data[0].embedding
embedding_b = resp.data[1].embedding

similarity_score = np.dot(embedding_a, embedding_b)
print(similarity_score)


0.8423564522735931


In [136]:
help(client.embeddings.create)

Help on method create in module openai.resources.embeddings:

create(*, input: 'Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]', model: "Union[str, Literal['text-embedding-ada-002', 'text-embedding-3-small', 'text-embedding-3-large']]", dimensions: 'int | NotGiven' = NOT_GIVEN, encoding_format: "Literal['float', 'base64'] | NotGiven" = NOT_GIVEN, user: 'str | NotGiven' = NOT_GIVEN, extra_headers: 'Headers | None' = None, extra_query: 'Query | None' = None, extra_body: 'Body | None' = None, timeout: 'float | httpx.Timeout | None | NotGiven' = NOT_GIVEN) -> 'CreateEmbeddingResponse' method of openai.resources.embeddings.Embeddings instance
    Creates an embedding vector representing the input text.
    
    Args:
      input: Input text to embed, encoded as a string or array of tokens. To embed multiple
          inputs in a single request, pass an array of strings or array of token arrays.
          The input must not exceed the max input tokens for the model (8192 token