In [None]:
%pip install --upgrade --quiet  dashscope
%pip install -qU zhipuai

In [None]:
from langchain_community.llms import Tongyi
from dotenv import load_dotenv
import os
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
# 加载 .env 文件
load_dotenv()  # 默认会加载位于项目根目录的 .env 文件

In [None]:
llm = Tongyi(dashscope_api_key=os.getenv("DASHSCOPE_API_KEY"), model_name="qwen-plus")
print(llm.model_name)

In [None]:
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_core.messages import HumanMessage
chatLLM = ChatTongyi(
    streaming=True,
)
messages = [
    ("system", "你是一名专业的翻译家，可以将用户的中文翻译为英文。"),
    ("human", "我喜欢编程。"),
]
res = chatLLM.stream([HumanMessage(content="你好请跟我进行十次对话")], streaming=True)
for r in res:
    print("chat resp:", r)

In [None]:
messages = [
    SystemMessage(content="Return only a JSON object as a response with no explanation text"),
    HumanMessage(content="Generate a JSON response object containing a brief description and release year for the movie '上甘岭'")
    
]

response = chatLLM(messages)
print(response)

In [None]:
messages2 = [
    SystemMessage(content="只返回一个JSON对象作为响应，不包含解释文本"),
    HumanMessage(content="生成一个JSON响应对象，其中包含电影《上甘岭》的简要描述和发行年份。")
]

response2 = chatLLM(messages2)
print(response2)

In [None]:
batch_messages = [
    [
        SystemMessage(content="Return only a JSON object as a response with no explanation text"),
        HumanMessage(content="Generate a JSON response object containing a brief description and release year for the movie 'Inception'")
    ],
    [
        SystemMessage(content="Return only a JSON object as a response with no explanation text"),
        HumanMessage(content="Generate a JSON response object containing a brief description and release year for the movie 'Avatar'")
    ]
]

result = chatLLM.generate(batch_messages)
print(result)

In [48]:
from typing import Any, Dict, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.utils import get_from_dict_or_env
from pydantic import BaseModel, Field, model_validator
import os
from zhipuai import ZhipuAI

class ZhipuAIEmbeddings(BaseModel, Embeddings):
    """ZhipuAI embedding model integration.

    Setup:

        To use, you should have the ``zhipuai`` python package installed, and the
        environment variable ``ZHIPU_API_KEY`` set with your API KEY.

        More instructions about ZhipuAi Embeddings, you can get it
        from  https://open.bigmodel.cn/dev/api#vector

        .. code-block:: bash

            pip install -U zhipuai
            export ZHIPU_API_KEY="your-api-key"

    Key init args — completion params:
        model: Optional[str]
            Name of ZhipuAI model to use.
        api_key: str
            Automatically inferred from env var `ZHIPU_API_KEY` if not provided.

    See full list of supported init args and their descriptions in the params section.

    Instantiate:

        .. code-block:: python

            from langchain_community.embeddings import ZhipuAIEmbeddings

            embed = ZhipuAIEmbeddings(
                model="embedding-2",
                # api_key="...",
            )

    Embed single text:
        .. code-block:: python

            input_text = "The meaning of life is 42"
            embed.embed_query(input_text)

        .. code-block:: python

            [-0.003832892, 0.049372625, -0.035413884, -0.019301128, 0.0068899863, 0.01248398, -0.022153955, 0.006623926, 0.00778216, 0.009558191, ...]


    Embed multiple text:
        .. code-block:: python

            input_texts = ["This is a test query1.", "This is a test query2."]
            embed.embed_documents(input_texts)

        .. code-block:: python

            [
                [0.0083934665, 0.037985895, -0.06684559, -0.039616987, 0.015481004, -0.023952313, ...],
                [-0.02713102, -0.005470169, 0.032321047, 0.042484466, 0.023290444, 0.02170547, ...]
            ]
    """  # noqa: E501

    client: Any = Field(default=None, exclude=True)  #: :meta private:
    model: str = Field(default="embedding-2")
    """Model name"""
    api_key: str
    """Automatically inferred from env var `ZHIPU_API_KEY` if not provided."""
    dimensions: Optional[int] = None
    """The number of dimensions the resulting output embeddings should have.

    Only supported in `embedding-3` and later models.
    """

    @model_validator(mode="before")
    @classmethod
    def validate_environment(cls, values: Dict) -> Any:
        """Validate that auth token exists in environment."""
        values["api_key"] = get_from_dict_or_env(values, "api_key", "ZHIPUAI_API_KEY")
        try:
            from zhipuai import ZhipuAI

            values["client"] = ZhipuAI(api_key=values["api_key"])
        except ImportError:
            raise ImportError(
                "Could not import zhipuai python package."
                "Please install it with `pip install zhipuai`."
            )
        return values



    def embed_query(self, text: str) -> List[float]:
        """
        Embeds a text using the AutoVOT algorithm.

        Args:
            text: A text to embed.

        Returns:
            Input document's embedded list.
        """
        resp = self.embed_documents([text])
        return resp[0]




    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """
        Embeds a list of text documents using the AutoVOT algorithm.

        Args:
            texts: A list of text documents to embed.

        Returns:
            A list of embeddings for each document in the input list.
            Each embedding is represented as a list of float values.
        """
        if self.dimensions is not None:
            resp = self.client.embeddings.create(
                model=self.model,
                input=texts,
                dimensions=self.dimensions,
            )
        else:
            resp = self.client.embeddings.create(model=self.model, input=texts)
        embeddings = [r.embedding for r in resp.data]
        return embeddings



In [55]:
embeddings = ZhipuAIEmbeddings(
    model="embedding-2",
    api_key=os.getenv("ZHIPU_API_KEY"),
    dimensions=1024
)



In [None]:
# Create a vector store with a sample text
from langchain.vectorstores import InMemoryVectorStore
text = "LangChain is the framework for building context-aware reasoning applications"
vector_store = InMemoryVectorStore(embeddings)
# 创建向量存储
print(vector_store)

In [None]:
from langchain_core.documents import Document

document_1 = Document(id="1", page_content="foo", metadata={"baz": "bar"})
document_2 = Document(id="2", page_content="thud", metadata={"bar": "baz"})
document_3 = Document(id="3", page_content="i will be deleted :(")
documents = [document_1, document_2, document_3]
vector_store.add_documents(documents=documents)

In [None]:
# 创建一个向量存储并使用示例文本

# 从 langchain.vectorstores 模块导入 InMemoryVectorStore 类
from langchain.vectorstores import InMemoryVectorStore

# 定义示例文本，用于向量化和存储
# 这里 text 是一个包含多个字符串的列表，每个字符串代表一个文档
text = [
    "LangChain is the framework for building context-aware reasoning applications",
    "A list of embeddings for each document in the input list",
    "深圳市，简称“深”，别称鹏城，广东省辖地级市、副省级市 [285]、国家计划单列市，超大城市 [267]"
]  # 示例文本内容

# 使用 InMemoryVectorStore 的 from_texts 方法创建一个向量存储实例
# 参数说明：
# - texts 是包含多个文本的列表
# - embedding 是用于将文本转换为向量的嵌入模型，假设 embeddings 已在其他地方定义
vector_store = InMemoryVectorStore.from_texts(
    texts=text,  # 直接传递 text 列表，而不是 [text]，因为 text 已经是一个列表
    embedding=embeddings,  # 使用预先定义的 embeddings 对象作为嵌入模型
)

# 将向量存储转换为检索器，以便执行相似性检索
# 设置 search_kwargs={"k": 1} 以仅返回排名第一的文档
retriever = vector_store.as_retriever(search_kwargs={"k": 1})  # 获取检索器对象，并设置返回文档的数量为1

# 定义查询字符串，用于检索最相似的文本
query = "中国?"  # 查询内容

# 使用检索器根据查询字符串检索相关文档
try:
    retrieved_documents = retriever.invoke(query)  # 执行检索操作，返回相关文档列表
except Exception as e:
    print(f"检索过程中发生错误: {e}")
    retrieved_documents = []

# 使用检索器根据查询字符串检索相关文档
retrieved_documents = retriever.invoke(query)  # 调用 invoke 方法执行检索操作，返回相关文档列表

# 检查是否有检索到的文档
if retrieved_documents:
    # 打印排名第一的文档的内容
    print("排名第一的检索到的文档内容：")
    print(retrieved_documents[0].page_content)  # 输出第一个检索到的文档内容
else:
    print("未检索到相关文档。")  # 如果没有检索到文档，输出提示信息


# 使用嵌入模型将单个文本转换为向量
# 这里我们选择 text 列表中的第一个元素作为示例
single_text = text[0]  # 选择第一个文本进行向量化
single_vector = embeddings.embed_query(single_text)  # 调用 embed_query 方法将单个文本转换为向量表示

# 打印生成的向量
print("文本的向量表示：")
print(single_vector)  # 将向量转换为字符串并输出，便于查看其数值表示


In [None]:
# 仅仅使用embeding
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


vectors = embeddings.embed_documents(text)
# 生成查询的嵌入
query_embedding = embeddings.embed_query("介绍深圳")

# 转换查询嵌入为二维数组
query_embedding_2d = np.array(query_embedding).reshape(1, -1)

# 计算每个文档与查询的余弦相似度
similarities = cosine_similarity(query_embedding_2d, vectors)

# 输出每个文档与查询的相似度分数
print(similarities)

# 找到最相似的文档
most_similar_doc_index = np.argmax(similarities)
print(f"最相似的文档索引为: {most_similar_doc_index}")
print(f"最相似的文档内容为: {text[most_similar_doc_index]}")


In [None]:
# 使用 documents 生成一系列文档的嵌入
documents = [
    (
        "北京市（Beijing），简称“京”，古称燕京、北平，是中华人民共和国首都、直辖市、"
        "国家中心城市、超大城市，[185]国务院批复确定的中国政治中心、文化中心、国际交往中心、"
        "科技创新中心，[1]中国历史文化名城和古都之一，世界一线城市。[3][142][189]截至2023年10月，"
        "北京市下辖16个区，总面积16410.54平方千米。[82][195][197]2022年末，北京市常住人口2184.3万人。[167]"
    ),
    (
        "深圳市，简称“深”，别称鹏城，广东省辖地级市、副省级市 [285]、国家计划单列市，超大城市 [267]，"
        "国务院批复确定的经济特区、全国性经济中心城市 [1]和国家创新型城市，粤港澳大湾区核心引擎城市之一 [242]。"
        "截至2022年末，全市下辖9个区，总面积1997.47平方千米，常住人口1766.18万人 [204][257]。"
    )
]

doc_result = embeddings.embed_documents(documents)
print(doc_result)
import numpy as np

doc_array = np.array(doc_result)
print(doc_array.shape)

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 生成查询的嵌入
query_embedding = embeddings.embed_query("介绍深圳")

# 转换查询嵌入为二维数组
query_embedding_2d = np.array(query_embedding).reshape(1, -1)

# 计算每个文档与查询的余弦相似度
similarities = cosine_similarity(query_embedding_2d, doc_result)

# 输出每个文档与查询的相似度分数
print(similarities)

# 找到最相似的文档
most_similar_doc_index = np.argmax(similarities)
print(f"最相似的文档索引为: {most_similar_doc_index}")
print(f"最相似的文档内容为: {documents[most_similar_doc_index]}")