## VLLMOpenAI

In [3]:
from langchain_community.llms import VLLMOpenAI

In [None]:
llm = VLLMOpenAI(
    openai_api_key="EMPTY",
    openai_api_base="http://0.0.0.0:6003/v1",
    model_name="qwen2.5",
    model_kwargs={"stop": ["."]},
)
for chunk in llm.stream("你好"):
    print(chunk, end="", flush=True)

## OpenAI

In [None]:
from openai import OpenAI

# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://0.0.0.0:6003/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

chat_response = client.chat.completions.create(
    model="qwen2.5",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "为我讲一个睡前小故事"},
    ],
    stream=True
)
for chunk in chat_response:
    print(chunk.choices[0].delta.content, end="", flush=True)

## VLLM

In [None]:
import torch

print(torch.cuda.is_available())

In [13]:
import gc
import ctypes
import torch
def clean_memory(deep=False):
    gc.collect()
    if deep:
        ctypes.CDLL("libc.so.6").malloc_trim(0)
    torch.cuda.empty_cache()

In [None]:
from langchain_community.llms import VLLM

# Initializing the vLLM model
llm = VLLM(
    model="/root/autodl-fs/modelscope/Qwen2.5-7B-Instruct",
    trust_remote_code=False,  # mandatory for Hugging Face models
    max_new_tokens=128,
    top_k=10,
    top_p=0.95,
    temperature=0.8,
)

# Running a simple query
print(llm.invoke("What are the most popular Halloween Costumes?"))

In [None]:
print(llm.invoke("你好，你是谁"))

## ChatOpenAI

In [5]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    api_key="sk-e559cfe2396e4ea88ca9c94c931f17a1",
    model="qwen2.5-14b-instruct",
    temperature=1.0,
)

In [7]:
from pydantic import BaseModel

class StructuredOutput(BaseModel):
    """故事大纲"""
    title: str
    key_points: list[str]
    content: str

llm_with_structured_output = llm.with_structured_output(StructuredOutput, method="function_calling")

In [8]:
llm_with_structured_output.invoke("为我讲个睡前小故事")

StructuredOutput(title='勇敢的小兔子', key_points=['小兔子发现自己的菜园被破坏了', '它决定找出破坏者', '小兔子在夜晚跟踪到了一只偷吃蔬菜的大灰狼'], content='从前，在一片宁静的森林里，住着一群快乐的动物们。其中有一只名叫蓝蓝的小兔子，它最喜欢的事情就是打理它的菜园子。\n\n有一天早上，蓝蓝醒来后发现自己辛苦种下的胡萝卜、青菜和土豆都被咬得乱七八糟。蓝蓝十分难过，但它并没有放弃，它下定决心要找到那个破坏者。\n\n经过几天的秘密调查，蓝蓝终于在一个月黑风高的夜晚，发现了一个大身影正在它的菜园子里啃食它的蔬菜。当蓝蓝看清那是谁的时候，它惊讶地发现原来是森林里的大灰狼。\n\n第二天早晨，蓝蓝把这件事情告诉了所有的森林居民。大家都感到非常震惊，但同时也很感谢勇敢的小兔子。从此以后，大灰狼再也不敢在森林里随意偷吃其他动物的食物了，森林又恢复了往日的平静和和谐。')

In [3]:
def add(a: int, b: int) -> int:
    """Add two integers.

    Args:
        a: First integer
        b: Second integer
    """
    return a + b

tools = [add]

llm_with_tools = llm.bind_tools(tools, tool_choice="auto")

In [4]:
llm_with_tools.invoke("What is 2 + 2?")

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'chatcmpl-tool-32dbe9b07a694c4cb3aa7e36d8deda3e', 'function': {'arguments': '{"a": 2, "b": 2}', 'name': 'add'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 192, 'total_tokens': 217, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen2.5', 'system_fingerprint': None, 'id': 'chatcmpl-be604f7c7a3e46ae9d27e4b1062c2f8c', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-3c3e1d4f-eda0-4d2c-9181-5292ed33288b-0', tool_calls=[{'name': 'add', 'args': {'a': 2, 'b': 2}, 'id': 'chatcmpl-tool-32dbe9b07a694c4cb3aa7e36d8deda3e', 'type': 'tool_call'}], usage_metadata={'input_tokens': 192, 'output_tokens': 25, 'total_tokens': 217, 'input_token_details': {}, 'output_token_details': {}})

In [6]:
llm.invoke("你好")

AIMessage(content='你好！很高兴为你服务。有什么问题或需要帮助的吗？', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 30, 'total_tokens': 45, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen2.5', 'system_fingerprint': None, 'id': 'chatcmpl-0c6d7e7c89d140899fb895612241cc32', 'finish_reason': 'stop', 'logprobs': None}, id='run-45311403-6d12-4160-9a21-74928d3363ba-0', usage_metadata={'input_tokens': 30, 'output_tokens': 15, 'total_tokens': 45, 'input_token_details': {}, 'output_token_details': {}})

In [1]:
from pydantic import BaseModel

class StructuredOutput(BaseModel):
    """故事大纲"""
    title: str
    key_points: list[str]
    content: str


In [2]:
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_core.messages import HumanMessage

chatLLM = ChatTongyi(
    # model="qwen2.5-14b-instruct",
    # streaming=True,
    dashscope_api_key="sk-e559cfe2396e4ea88ca9c94c931f17a1"
)
chatLLM = chatLLM.with_structured_output(StructuredOutput)

In [3]:
input_text = [HumanMessage(content="为我讲个睡前小故事")]
res = chatLLM.invoke(input_text)


In [4]:
res