In [9]:
"""Run this model in Python

> pip install openai
"""
import os
from openai import OpenAI


In [10]:
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

from azure.identity import DefaultAzureCredential, get_bearer_token_provider, AzureCliCredential



In [11]:
models = {
    "DeepSeek-R1": "DeepSeek-R1", 
    "Phi-4-mini-instruct": "Phi-4-mini-instruct-hub", 
    "gpt-4o": "gpt-4o",
    "o3-mini": "o3-mini"
}

In [12]:
endpoint = os.getenv("AZURE_INFERENCE_SDK_ENDPOINT")

In [13]:
client = ChatCompletionsClient(
    endpoint=endpoint, 
    credential=AzureKeyCredential(os.getenv("AZURE_INFERENCE_SDK_ENDPOINT_KEY")),
    credential_scopes=["https://cognitiveservices.azure.com/.default"],

    )

In [14]:
from azure.ai.inference.models import (
    SystemMessage,
    UserMessage
)

In [15]:
def parse_think_tags(content):
    """
    Parse the <think> </think> tags out of the response in content
    """
    import re
    # Find all <think>...</think> tags and their content
    think_tags = re.findall(r"<think>(.*?)</think>", content)
    # match = re.search(r"<think>(.*?)</think>", content)
    # think_tags = match.group(0) if match else ""
    
    # Remove the <think>...</think> tags from the content
    content_without_think_tags = re.sub(r"<think>.*?</think>", "", content)
    
    return think_tags, content_without_think_tags

In [16]:
messages = [
    UserMessage(content="One shirt takes 3 hours to make. How long will it take to make 10 shirts?"),
]

# DeepSeek-R1

In [17]:
response = client.complete( 
    messages = messages, 
    model = models['DeepSeek-R1']
)



HttpResponseError: (Timeout) The operation was timeout.
Code: Timeout
Message: The operation was timeout.

In [None]:
print(response.choices[0].message.content)

<think>
Okay, let's see. The problem says that one shirt takes 3 hours to make, and we need to find out how long it will take to make 10 shirts. Hmm, seems straightforward, but let me think through it step by step.

First, if making one shirt requires 3 hours, then for 10 shirts, I probably need to multiply the time per shirt by the number of shirts. That would be 3 hours per shirt times 10 shirts. So, 3 times 10... that equals 30 hours. Wait, but is there anything else I need to consider here?

Let me visualize the scenario. If I have one person working on the shirts, each shirt takes 3 hours, right? So, making them one after another would indeed take 30 hours total. But maybe the question is implying if there's a way to reduce the time by working on multiple shirts at the same time? However, usually, when such problems are presented, especially in basic math, they assume that the work is done sequentially unless stated otherwise. The problem doesn't mention anything about multiple wo

# gpt-4o

In [18]:
# gpt-4o
response = client.complete(
    messages = messages, 
    model = models["gpt-4o"]
)
print(response.choices[0].message.content)

If it takes **3 hours** to make one shirt, then to make **10 shirts**, you simply multiply the time by the number of shirts:

\[
3 \, \text{hours} \times 10 \, \text{shirts} = 30 \, \text{hours}.
\]

So, it will take **30 hours** to make 10 shirts.


# Phi-4-mini-instruct

In [19]:
response = client.complete(
    messages = messages, 
    model = models['Phi-4-mini-instruct']
)
print(response.choices[0].message.content)

HttpResponseError: (unknown_model) Unknown model: phi-4-mini-instruct-hub
Code: unknown_model
Message: Unknown model: phi-4-mini-instruct-hub

# o3-mini

In [None]:
from azure.ai.inference.models import 

In [20]:
client = ChatCompletionsClient(
    endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), 
    credential=AzureKeyCredential(os.getenv("AZURE_OPENAI_API_KEY"))
)
response = client.complete(
    messages = messages, 
    model = models['o3-mini']
)
print(response.choices[0].message.content)

ResourceNotFoundError: (404) Resource not found
Code: 404
Message: Resource not found