In [1]:
# must run on => Python 3.10

In [8]:
from openai import OpenAI
import os
import httpx
import json
from typing import List, Dict, Generator

In [9]:
model_url = 'https://caii-prod-validation.eng-ml-l.vnu8-sqze.cloudera.site/namespaces/serving-default/endpoints/deepseek-r1-distill-8b/openai/v1'

In [10]:
model_name = 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B'

In [11]:
class ChatClient:
    def __init__(self):
          
        # Set up HTTP client
        if "CUSTOM_CA_STORE" not in os.environ:
            http_client = httpx.Client()
        else:
            http_client = httpx.Client(verify=os.environ["CUSTOM_CA_STORE"])
            
        # Load API key
        OPENAI_API_KEY = json.load(open("/tmp/jwt"))["access_token"]
        
        # Initialize OpenAI client
        self.client = OpenAI(
            base_url = model_url,
            api_key=OPENAI_API_KEY,
            http_client=http_client,
        )
        
        self.conversation_history: List[Dict[str, str]] = []
        
    def chat(self, message: str, stream: bool = True) -> str:
        """
        Send a message to the chat model and get the response.
        
        Args:
            message: The message to send to the model
            stream: Whether to stream the response or return it all at once
            
        Returns:
            The complete response as a string
        """
        # Add user message to history
        self.conversation_history.append({"role": "user", "content": message})
        
        if stream:
            partial_message = ""
            response = self.client.chat.completions.create(
                model= model_name,
                messages=self.conversation_history,
                stream=True,
            )
            
            for chunk in response:
                if chunk.choices[0].delta.content is not None:
                    content = chunk.choices[0].delta.content
                    partial_message += content
                    print(content, end='', flush=True)
            
            print()  # New line after response is complete
            # Add complete response to history
            self.conversation_history.append({"role": "assistant", "content": partial_message})
            return partial_message
            
        else:
            response = self.client.chat.completions.create(
                model=model_name,
                messages=self.conversation_history,
                stream=False,
            )
            complete_response = response.choices[0].message.content
            self.conversation_history.append({"role": "assistant", "content": complete_response})
            return complete_response
    
    def get_history(self) -> List[Dict[str, str]]:
        """Get the conversation history."""
        return self.conversation_history
    
    def clear_history(self):
        """Clear the conversation history."""
        self.conversation_history = []

In [12]:
# Initialize the chat client
chat_client = ChatClient()

In [13]:
message ="""
"In a room of 30 people, everyone shakes hands exactly once with everyone else. How many total handshakes occur? Walk me through your reasoning step by step."""
# """You are a JSON extraction system. Extract customer information and return ONLY a JSON object.

# Format must be exactly:
# {
#     "account_id": "four digit account ID or empty string",
#     "name": "full name or empty string"
# }
# DO NOT add any extra text or conversation.

# Example:
# Input: "Hi, my name is John Smith and my account number is 1234"
# Output: {
#     "account_id": "1234",
#     "name": "John Smith"
# }

# Input: "Hello, I'm Mary Jones"
# Output: {
#     "account_id": "",
#     "name": "Mary Jones"
# }

# Here is the conversation to analyze: 'Hello my name is Max Fisher and my account ID is 1004.'"""

In [14]:
# For streaming responses (will print as it receives chunks):
response = chat_client.chat(message, stream=False)

In [15]:
response

"<think>\nFirst, I need to understand that every person in the room will shake hands with every other person exactly once.\n\nThere are 30 people in total.\n\nNext, I'll determine how many handshakes each person will initiate. Since each person shakes hands with 29 others, that means 29 handshakes per person.\n\nTo find the total number of unique handshakes, I need to consider that each handshake is counted twice when considering all individual handshakes. Therefore, I'll use the formula for combinations, specifically the number of ways to choose 2 people out of 30.\n\nCalculating this gives me 30 multiplied by 29 divided by 2, which equals 435.\n\nSo, there are a total of 435 unique handshakes in the room.\n</think>\n\nLet's determine the total number of handshakes that occur when each of the 30 people in the room shakes hands with every other person exactly once. Here's a step-by-step breakdown of the reasoning:\n\n### Step 1: Understand the Problem\n- **Total number of people in the

In [16]:
message2 = " what do you mean by : allows computer systems to automatically learn and improve from experience without being explicitly programmed? can you give me an example"

In [17]:
# For streaming responses (will print as it receives chunks):
response = chat_client.chat(message2, stream=True)

When I say "allows computer systems to automatically learn and improve from experience without being explicitly programmed," I mean that machine learning algorithms are designed to find patterns and make predictions based on data, rather than being explicitly coded to perform a specific task.

For example, let's say we have a large dataset of emails, and we want to create a system that can automatically classify each email as spam or not spam. Instead of manually programming a set of rules to determine whether an email is spam or not based on features like the presence of certain keywords or the email address of the sender, we can use a machine learning algorithm to learn the patterns that distinguish spam emails from non-spam emails from the data.

The algorithm would analyze the dataset and come up with a mathematical model that can classify new emails as spam or not spam based on the patterns it has learned. As the algorithm is exposed to more data over time, it can continuously imp

In [11]:
# # For streaming responses:
# for response in chat_client.chat(message):
#     print(response, end="\r")
# print()  # New line after response is complete