## 调用本地 ollama服务内的模型

In [None]:
import requests
import json

def stream_chat(prompt, model="qwen2:7b"):
    """
    流式输出模型回复
    :param prompt: 输入的提示词
    :param model: 使用的模型名称
    """
    url = "http://localhost:11434/api/generate"
    data = {
    "model": model,
    "prompt": prompt,
    "stream": True
    }
    try:
        response = requests.post(url, json=data, stream=True)
        response.raise_for_status()
        for line in response.iter_lines():
            if line:
                json_response = json.loads(line)
            if 'response' in json_response:
                print(json_response['response'], end='', flush=True)
            if json_response.get('done', False):
                print() # 换行
    except requests.exceptions.RequestException as e:
        print(f"请求错误: {e}")
# 使用示例
if __name__ == "__main__":
    prompt= "写一首关于春天的诗"
    print(prompt)
    stream_chat(prompt)

In [9]:
from langchain_community.llms import Ollama
ollama = Ollama(
    base_url='http://127.0.0.1:11434',
    # model='llama3.2" 
    model="qwen2:7b",
    timeout=60)

prompt =  "python有哪些应用场景?回答在三行以内"
result =ollama.invoke(prompt)
print(result)

Python广泛应用于Web开发、数据科学、人工智能、自动化脚本和软件工程等领域。


## 调用远程通义模型  需要 .env 文件内有 api key 才可用

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import Tongyi

#加载环境变量，安装方式:pipinstallpython-dotenv
from dotenv import load_dotenv
load_dotenv()

my_prompt = PromptTemplate.from_template("""回答这个问题:{input}""")
llm = Tongyi()
chain =my_prompt|llm| StrOutputParser()
output = chain.invoke({"input":"langchain比llm多了哪些能力?"})
print(output)

## 调用本地ollama 保留链式处理

In [12]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import Ollama

# 加载环境变量，安装方式:pip install python-dotenv
from dotenv import load_dotenv
load_dotenv()

# 创建 PromptTemplate 模板定义
my_prompt = PromptTemplate.from_template("""回答这个问题:{input}""")

# 创建 Ollama 实例
# 确保 Ollama 服务正在运行，并且模型已经拉取到本地
ollama = Ollama(
    base_url='http://127.0.0.1:11434',  # 本地 Ollama 服务地址
    model='qwen2:7b',  # 你要使用的模型名称
    timeout=60
)

# 链式动作可调用api 调用数据库  爬虫 结果储存 等动作
# 创建链式调用  模板定义->大模型处理->输出内容解析   
chain = my_prompt | ollama | StrOutputParser()

# 调用链式调用
output = chain.invoke({"input": "你是谁？20字以内回答"})

# 打印输出
print(output)

我是通义千问，阿里云研发的大型语言模型。


## 调用embedding

In [None]:
from langchain_community.embeddings import OllamaEmbeddings 
# from lang import OllamaEmbeddings 

# 创建 OllamaEmbeddings 实例
embeddings = OllamaEmbeddings(
    base_url="http://127.0.0.1:11434",  # 本地 Ollama 服务地址
    model="bge-m3:latest"  # 你要使用的 Embedding 模型名称
)

# 要进行嵌入处理的文本
text = "这是一段需要生成嵌入向量的文本。"

# 生成嵌入向量
vector = embeddings.embed_query(text)

print("嵌入向量:", vector)

  embeddings = OllamaEmbeddings(


嵌入向量: [-0.7798959016799927, 0.4922415018081665, -0.441287100315094, 0.28875255584716797, -0.3583776354789734, -0.4594695568084717, 0.6832022666931152, 0.05733785033226013, 0.4794306755065918, 0.5187544822692871, -0.4901088774204254, -0.1318829357624054, 0.7373702526092529, -0.14499609172344208, 0.2569102942943573, -0.6828105449676514, 0.2604897618293762, 0.13714514672756195, 0.7221396565437317, -0.335860013961792, -0.4375118017196655, 0.4374937415122986, -0.47612062096595764, 0.04323546215891838, 0.523111879825592, 1.3132944107055664, 0.23885981738567352, 0.39085492491722107, -0.21066151559352875, -0.47984954714775085, 0.9997566938400269, 0.5261889696121216, 0.28176015615463257, -1.6304614543914795, 0.5497358441352844, -1.2524627447128296, -0.6110866665840149, -0.17967568337917328, -0.8742796182632446, 0.7927057147026062, -0.3652312457561493, -0.3822226822376251, 0.38097986578941345, -0.9280879497528076, 0.5939705967903137, -0.8186105489730835, 0.484907865524292, -0.5899010896682739, -

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.runnables import RunnablePassthrough

# 创建 PromptTemplate
my_prompt = PromptTemplate.from_template("""回答这个问题:{input}""")

# 创建 OllamaEmbeddings 实例
embeddings = OllamaEmbeddings(
    base_url="http://127.0.0.1:11434",
    model="bge-m3:latest"
)

embed_model =  RunnablePassthrough.assign(embedding=lambda x: embeddings.embed_query(x["input"]))

# 创建链式调用 匿名函数会把 my_prompt 的输出（字符串）包装成字典 提供给 RunnablePassthrough使用
chain = my_prompt | (lambda x: {"input": x})  | embed_model

# 调用链式调用
output = chain.invoke({"input": "这是一个测试问题。"})

print("嵌入向量:", output["embedding"])

嵌入向量: [-1.05576491355896, 0.5996000170707703, -0.5324851870536804, -0.3476364314556122, -0.7919272184371948, -1.0490984916687012, 0.4075475037097931, 1.178335189819336, 0.43738114833831787, -0.14520221948623657, -0.7531581521034241, 0.07962936162948608, -0.0024054311215877533, -0.006294973194599152, 0.2424643337726593, -0.7145180106163025, 0.13779820501804352, -0.3432404100894928, -0.49929702281951904, -0.7475741505622864, -1.3949010372161865, -0.09413941949605942, -0.09653081744909286, -0.2642526626586914, 0.728965163230896, 0.5470764636993408, -0.13258464634418488, -0.45372459292411804, 0.08940847963094711, 0.19150951504707336, 1.0922967195510864, -0.59210604429245, 0.002966463565826416, -1.3575297594070435, -0.35116463899612427, -0.5823978185653687, -0.4085228145122528, -1.2664270401000977, -0.617805004119873, 0.3757806122303009, 0.3168157935142517, -0.9225535988807678, 0.46308064460754395, -1.2691782712936401, -0.36620938777923584, -0.07030166685581207, 0.3066474497318268, -0.97445

In [17]:
## 搭建RAG

In [None]:
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader 
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain.vectorstores.utils import filter_complex_metadata
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

#Create the prompte from the template.
promptTemplate = """Answer the question as precise as possible using the provided context, 
    If the answer isnot contained in the context, say "answer not available in context" \n\n
    Context:{context}Question: {question} Answer:"""
#日 期
modelSel=""

#Load the PDF file to ChromaDB
def loadDataFromPDFFile(filePath):
    loader =PyPDFLoader(filePath) 
    pages =loader.load_and_split()
    chunks =filter_complex_metadata(pages)
    vector_store = Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())
    return vector_store

def modelResponse(message ,history):
    llm = ChatOllama(model = conf["model"])
    prompt = PromptTemplate(template=promptTemplate , input_variables=["context","question"])

    #Initiate the retriever
    dbLoaded =loadDataFromPDFFile("~/Desktop/hp/HP1.pdf" )
    refriever = dbLoaded,as_retriever(search_type="similarity_score threshold",
                                      search_kwargs ={'k': 5,'score threshold': 0.2})
    hpChain =(
        {"context": retriever ,"question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return hpChain.invoke(message)

if __name__ =="__main__":
    conf={}
    with open("config.json","r") as confFile:
        conf = json.load(confFile)
        print(conf["model"])
    # chatUT = gradio