# 使用openai的api请求

## 使用非流式的

In [1]:
python_paths = ['/data1/llm-notes/llm-notes/llm_chat_service',
               '/data1/llm-notes/llm-notes/agent_autogen']

In [2]:
import sys

sys.path.extend(python_paths)

In [3]:
from chat_client import ChatClient

In [4]:
client = ChatClient(api_key='NULL', base_url='http://127.0.0.1:7777/v1',
                    model='Meta-Llama-3-8B-Instruct')

In [5]:
%%time
client.chat("你好")

CPU times: user 14 ms, sys: 7.89 ms, total: 21.9 ms
Wall time: 1.1 s


' 你好！我是你的智能助手，我可以帮助你回答问题、完成任务、提供建议等。有什么问题或需要帮助的事情？请随时问我！ 😊\n\n'

In [9]:
client.history

[('你好', '你好！很高兴为你提供帮助。有什么问题或需要讨论的主题吗？'),
 ('你好', '你好！如果你有任何问题、需要咨询的信息或者想要讨论的话题，请随时告诉我，我会尽力为你提供帮助。')]

In [None]:
import gradio as gr

gr.ChatInterface(client.predict, chatbot=gr.Chatbot(height=700))\
    .launch(server_name='0.0.0.0', server_port=9999)

## 使用流式的

暂时不支持流式的，所以下面的代码无法工作。

In [None]:
from openai import OpenAI
client = OpenAI(
    api_key="Qwen1.5-14B-Chat-GPTQ-Int4", # 不能不设置，也不能为空
    base_url="http://127.0.0.1:7777/v1"
)

system_prompt = "你是 Qwen1.5, Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. 你擅长中文和英文的对话。你会为用户提供安全，有帮助，准确的回答。"

def predict(message, history):
    history_openai_format = [
        {"role": "system", "content": system_prompt}
    ]
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": message})
  
    response = client.chat.completions.create(
        model="Qwen1.5-14B-Chat-GPTQ-Int4",
        messages=history_openai_format,
        temperature=0.3,
        stream=True)

    partial_message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            partial_message = partial_message + chunk.choices[0].delta.content
            yield partial_message

    
import gradio as gr

gr.ChatInterface(predict, chatbot=gr.Chatbot(height=700)).launch(server_name='0.0.0.0', server_port=9999)


# 归档

## 使用http请求

当时因为base_url填写了完整接口的url，导致openai的api请求方案不成功，所以写了下面的方案。

目前openai的api方案已经调通，下面的方案可以归档了。

In [None]:
import json
import requests
from requests.adapters import HTTPAdapter


def call_llm(message, llm_url="http://127.0.0.1:7777/v1/chat/completions"):
    # url = "http://10.113.73.32:50053/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    data = json.dumps({"model": "Qwen1.5-14B-Chat-GPTQ-Int4", "top_k": 5, "top_p": 0.85, "temperature": 0.3,
                       "messages": message})
    s = requests.Session()
    s.mount('http://', HTTPAdapter(max_retries=3))
    # res = requests.post(url, data=data, headers=headers)
    try:
        res = s.post(llm_url, data=data, headers=headers, timeout=600)
        print(res)
        if res.status_code == 200:
            return res.json()['choices'][0]['message']['content']
        else:
            return None
    except requests.exceptions.RequestException as e:
        print(e)
        return None

system_prompt = "你是 Qwen1.5, Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. 你擅长中文和英文的对话。你会为用户提供安全，有帮助，准确的回答。"

def predict(message, history):
    history_openai_format = [
        {"role": "system", "content": system_prompt}
    ]
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": message})

    return call_llm(history_openai_format)
    
import gradio as gr

gr.ChatInterface(predict, chatbot=gr.Chatbot(height=700)).launch(server_name='0.0.0.0', server_port=9999)
