In [1]:
import os
import requests
from typing import Dict, Optional

In [3]:
class LLMClient:
    
    def __init__(
        self,
        api_key: str = "cse476",
        api_base: str = "http://10.4.58.53:41701/v1",
        model: str = "bens_model"
    ):

        self.api_key = api_key
        self.api_base = api_base
        self.model = model
        self.call_count = 0
    
    def call(
        self,
        prompt: str,
        system: str = "You are a helpful assistant.",
        temperature: float = 0.0,
        max_tokens: int = 128,
        timeout: int = 60
    ) -> Dict:
        self.call_count += 1
        
        url = f"{self.api_base}/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        payload = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": system},
                {"role": "user", "content": prompt}
            ],
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
        
        try:
            resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
            status = resp.status_code
            
            if status == 200:
                data = resp.json()
                text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
                return {
                    "ok": True,
                    "text": text,
                    "status": status,
                    "error": None,
                    "raw": data
                }
            else:
                try:
                    err_text = resp.json()
                except Exception:
                    err_text = resp.text
                
                return {
                    "ok": False,
                    "text": None,
                    "status": status,
                    "error": str(err_text),
                    "raw": None
                }
                
        except requests.RequestException as e:
            return {
                "ok": False,
                "text": None,
                "status": -1,
                "error": str(e),
                "raw": None
            }
    
    def reset_counter(self):
        self.call_count = 0
    
    def get_call_count(self) -> int:
        return self.call_count


def call_model_chat_completions(
    prompt: str,
    system: str = "You are a helpful assistant.",
    model: str = "bens_model",
    temperature: float = 0.0,
    timeout: int = 60
) -> Dict:
    client = LLMClient(model=model)
    return client.call(prompt, system, temperature, timeout=timeout)