In [48]:
import requests

def call_qwen2(prompt: str, max_tokens: int = 100, temperature: float = 0.0):
    url = "http://localhost:8000/v1/completions"
    headers = {
        "Content-Type": "application/json"
    }
    payload = {
        "model": "/models/qwen2",
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": temperature
    }

    resp = requests.post(url, json=payload, headers=headers)
    resp.raise_for_status()  # 如果返回码不是 2xx，会抛出异常

    data = resp.json()
    # 通常生成结果在 data["choices"][0]["text"]
    return data["choices"][0]["text"]

if __name__ == "__main__":
    output = call_qwen2("San Francisco is a")
    print("Model output:\n", output)

Model output:
  city that is known for its unique architecture, vibrant culture, and stunning natural beauty. It is also a city that is home to some of the most iconic landmarks in the world, including the Golden Gate Bridge, the Golden Gate Park, and the Golden Gate Bridge Museum. The city is also home to some of the most famous museums in the world, including the Museum of Modern Art, the San Francisco Museum of Modern Art, and the Museum of the Moving Image.
San Francisco is also home to some


In [49]:
import requests

def chat_with_model(messages, model="/models/qwen2"):
    url = "http://localhost:8000/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": messages,
        "max_tokens": 128,       # 一定要带上 max_tokens
        "temperature": 0.0       # 建议带上 temperature
    }
    resp = requests.post(url, json=payload, headers=headers)
    print("Status:", resp.status_code)
    print("Body:", resp.text)  # 先看看服务器到底怎么说
    resp.raise_for_status()
    return resp.json()

if __name__ == "__main__":
    msgs = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user",   "content": "Who won the world series in 2020?"}
    ]
    try:
        result = chat_with_model(msgs)
        print("Reply:", result["choices"][0]["message"]["content"])
    except Exception as e:
        print("调用失败")

Status: 200
Body: {"id":"chatcmpl-68d58d5f802f4761b71717919a984763","object":"chat.completion","created":1745005422,"model":"/models/qwen2","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":null,"content":"The 2020 World Series was won by the Los Angeles Dodgers, led by Clayton Kershaw and Clayton Kershaw.","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":31,"total_tokens":60,"completion_tokens":29,"prompt_tokens_details":null},"prompt_logprobs":null}
Reply: The 2020 World Series was won by the Los Angeles Dodgers, led by Clayton Kershaw and Clayton Kershaw.


In [41]:
import requests, pprint
from datetime import datetime

def generate_completion(prompt, host="localhost", port=8000,
                        max_tokens=64, temperature=0.2):
    url = f"http://{host}:{port}/v1/completions"
    payload = {"model": "/models/qwen2",
               "prompt": prompt,
               "max_tokens": max_tokens,
               "temperature": temperature}
    r = requests.post(url, json=payload, timeout=600)
    r.raise_for_status()
    return r.json()

resp = generate_completion(f"What time is it? {datetime.now()}")
pprint.pp(resp)

{'id': 'cmpl-5761d103a2ce4bb885040d7d5c641d68',
 'object': 'text_completion',
 'created': 1745004218,
 'model': '/models/qwen2',
 'choices': [{'index': 0,
              'text': '\n'
                      'What is the current time in the format "HH:MM:SS"?\n'
                      'To find the current time in the format "HH:MM:SS", you '
                      'can use the following Python code:\n'
                      '```python\n'
                      'import datetime\n'
                      'current_time = datetime.datetime.now().time()\n'
                      'current_time\n'
                      '```\n'
                      'This will output the',
              'logprobs': None,
              'finish_reason': 'length',
              'stop_reason': None,
              'prompt_logprobs': None}],
 'usage': {'prompt_tokens': 32, 'total_tokens': 96, 'completion_tokens': 64}}
