In [None]:
# 语义分析划分段落

from langchain_community.document_loaders import (PyPDFLoader)
# 换一个OCR？
import os
import numpy as np

from langchain_community.vectorstores import Chroma

from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)
from sentence_transformers import SentenceTransformer
from ltp import StnSplit


path_docfolder = "STPDF"
path_db = "STDB"

model_name = "paraphrase-multilingual-MiniLM-L12-v2"

embedding_function = SentenceTransformerEmbeddings(model_name=model_name)

THRESHOLD = 70
    

class SemanticParagraphSplitter:
    def __init__(self, threshold=THRESHOLD, model_path=model_name):
        self.threshold = threshold
        self.model = SentenceTransformer(model_path)

    @staticmethod
    def cut_sentences(text):
        sentences = StnSplit().split(text)
        return sentences

    @staticmethod
    def combine_sentences(sentences, buffer_size=2):
        # Go through each sentence dict
        for i in range(len(sentences)):

            # Create a string that will hold the sentences which are joined
            combined_sentence = ''

            # Add sentences before the current one, based on the buffer size.
            for j in range(i - buffer_size, i):
                # Check if the index j is not negative (to avoid index out of range like on the first one)
                if j >= 0:
                    # Add the sentence at index j to the combined_sentence string
                    combined_sentence += sentences[j]['sentence'] + ' '

            # Add the current sentence
            combined_sentence += sentences[i]['sentence']

            # Add sentences after the current one, based on the buffer size
            for j in range(i + 1, i + 1 + buffer_size):
                # Check if the index j is within the range of the sentences list
                if j < len(sentences):
                    # Add the sentence at index j to the combined_sentence string
                    combined_sentence += ' ' + sentences[j]['sentence']

            # Then add the whole thing to your dict
            # Store the combined sentence in the current sentence dict
            sentences[i]['combined_sentence'] = combined_sentence

        return sentences

    def build_sentences_dict(self, sentences):
        indexed_sentences = [{'sentence': x, 'index': i} for i, x in enumerate(sentences)]
        combined_sentences = self.combine_sentences(indexed_sentences)

        embeddings = self.model.encode([x['combined_sentence'] for x in combined_sentences], normalize_embeddings=True)

        for i, sentence in enumerate(combined_sentences):
            sentence['combined_sentence_embedding'] = embeddings[i]

        return combined_sentences

    @staticmethod
    def calculate_cosine_distances(sentences):
        distances = []
        for i in range(len(sentences) - 1):
            embedding_current = sentences[i]['combined_sentence_embedding']
            embedding_next = sentences[i + 1]['combined_sentence_embedding']

            # Calculate cosine similarity
            # similarity = cosine_similarity([embedding_current], [embedding_next])[0][0]
            similarity = embedding_current @ embedding_next.T
            # Convert to cosine distance
            distance = 1 - similarity

            # Append cosine distance to the list
            distances.append(distance)

            # Store distance in the dictionary
            sentences[i]['distance_to_next'] = distance

        # Optionally handle the last sentence
        # sentences[-1]['distance_to_next'] = None  # or a default value

        return distances, sentences

    def calculate_indices_above_thresh(self, distances):
        breakpoint_distance_threshold = np.percentile(distances, self.threshold)
        # The indices of those breakpoints on your list
        indices_above_thresh = [i for i, x in enumerate(distances) if x > breakpoint_distance_threshold]
        return indices_above_thresh

    @staticmethod
    def cut_chunks(indices_above_thresh, sentences):
        # Initialize the start index
        start_index = 0

        # Create a list to hold the grouped sentences
        chunks = []

        # Iterate through the breakpoints to slice the sentences
        for index in indices_above_thresh:
            # The end index is the current breakpoint
            end_index = index

            # Slice the sentence_dicts from the current start index to the end index
            group = sentences[start_index:end_index + 1]
            combined_text = ' '.join([d['sentence'] for d in group])
            chunks.append(combined_text)

            # Update the start index for the next group
            start_index = index + 1

        # The last group, if any sentences remain
        if start_index < len(sentences):
            combined_text = ' '.join([d['sentence'] for d in sentences[start_index:]])
            chunks.append(combined_text)

        return chunks

    def split(self, text):
        single_sentences = (self.cut_sentences(text)) #Pre-split with standard function
        print(f"{len(single_sentences)} single sentences were found")
        if len(single_sentences) == 1:
        # 如果只有一句话，直接返回这句话
            return single_sentences
        else:
        # 如果有多句话，进行分割
            chunks = self.split_passages(single_sentences)
            return chunks
    def split_passages(self, passages):
        combined_sentences = self.build_sentences_dict(passages)
        distances, sentences = self.calculate_cosine_distances(combined_sentences)

        indices_above_thresh = self.calculate_indices_above_thresh(distances)
        chunks = self.cut_chunks(indices_above_thresh, sentences)
        return chunks

def read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding):
    # Iterate over all files in the folder
    full_content = []
    for filename in os.listdir(path_docfolder):
        #print(filename)
        if filename.endswith('.pdf'):  # Check if the file is a PDF
            file_path = os.path.join(path_docfolder, filename)
            print(f"Reading file: {file_path}")

            # Open the PDF file
            loader = PyPDFLoader(file_path)
            pages_pypdf = loader.load()
            for page in pages_pypdf:
                # 内容全合并在一起
                full_content.append(page.page_content.replace(" ", "").replace("\n", ""))

    text_splitter = SemanticParagraphSplitter(threshold=THRESHOLD)
            # text_splitter = RecursiveCharacterTextSplitter(
            #     chunk_size=260,
            #     chunk_overlap=20,
            # )
    for content in full_content:
        docs = text_splitter.split(content)
        

    # Facility Step 3:用特定模型做embedding
    #db2 = Chroma.from_documents(docs, embedding, persist_directory=path_db)
        db2 = Chroma.from_texts(docs, embedding, persist_directory=path_db)
        print("Successfully save the embedding into DB")
    return True

read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding_function)

In [None]:
from langchain.vectorstores import Chroma

path_db = "STDB"

from langchain.embeddings import HuggingFaceEmbeddings
model_name = "paraphrase-multilingual-MiniLM-L12-v2"
embedding = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = Chroma(persist_directory=path_db, embedding_function=embedding)

from langchain.memory import ChatMessageHistory
from langchain.prompts.chat import ChatPromptTemplate,SystemMessagePromptTemplate,HumanMessagePromptTemplate,AIMessagePromptTemplate,MessagesPlaceholder
from langchain.schema import HumanMessage,SystemMessage,AIMessage
# from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
import os

os.environ['VLLM_USE_MODELSCOPE']='True'
chat=ChatOpenAI(
    model="qwen/Qwen-7B-Chat-Int4",
    openai_api_key="EMPTY",
    openai_api_base='http://localhost:8000/v1',
    # stop=['<|im_end|>']
)

# Prompt模板
system_prompt=SystemMessagePromptTemplate.from_template('You are a knowledge base quiz assistant.')
user_prompt=HumanMessagePromptTemplate.from_template('''
Answer the question based only on the following context:

{context1}

Question: {query1}
''')
full_chat_prompt=ChatPromptTemplate.from_messages([system_prompt,MessagesPlaceholder(variable_name="chat_history1"),user_prompt])


# Chat chain
chat_chain={
        "context1": itemgetter("retrievers"),
        "query1": itemgetter("query"),
        "chat_history1":itemgetter("chat_history"),
    }|full_chat_prompt|chat

# 开始对话
chat_history=[]
while True:
    query=input('query:')
    retriever = vectorstore.similarity_search(query,k=20)
    response=chat_chain.invoke({'retrievers':retriever,'query':query,'chat_history':chat_history})
    chat_history.extend((HumanMessage(content=query),response))
    print(response.content)
    chat_history=chat_history[-20:]

In [None]:
# RecursiveCharacterTextSplitter方法分词存入数据库

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)

path_docfolder = "STPDF"
path_db = "STDB1"

model_name = "paraphrase-multilingual-MiniLM-L12-v2"
embedding_function = SentenceTransformerEmbeddings(model_name=model_name)

for filename in os.listdir(path_docfolder):
    if filename.endswith('.pdf'): 
        file_path = os.path.join(path_docfolder, filename)
        # Open the PDF file
        pdf_loader = PyPDFLoader(file_path,extract_images=True)
        # 解析PDF，切成chunk片段
        chunks=pdf_loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100))
        for doc in chunks:
            doccontent = doc.page_content 
            db2 = Chroma.from_texts(doccontent, embedding_function, persist_directory=path_db)
        print("Successfully save the embedding into DB")

In [None]:
# ollama+rag

from langchain_community.document_loaders import PyPDFLoader,WebbaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnable import RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_spiltters import CharacterTextSpiltters

model_local=ChatOllama(model='')
# Split into chunks

urls = [
    ""
]

docs = [WebbaseLoader(urls).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
text_spiltters = CharacterTextSpiltters.from_tikton_encoder(chunk_size=7500,chunk_overlap=100)
doc_spilts = text_spiltters.spilt_documents(docs_list)

# embedding and store
vectorstore = Chroma.from_documents(
    documents = doc_spilts,
    collection_name = "webbase",
    embedding = embeddings.ollama.OllamaEmbeddings(model='') 
)

retreiver = vectorstore.as_retreiver()

# before RAG
print("==================================before RAG================================")
before_rag_temolate = "what is {topic}"
before_rag_prompt = ChatPromptTemplate.from_template(before_rag_temolate)
before_rag_chain = before_rag_prompt | model_local | StrOutputParser()
print(before_rag_chain.invoke({"topic":"Ollama"}))

# after RAG
print("==================================after RAG================================")
after_rag_temolate ='''
Answer the question based only on the following context:
{context}
Question: {query}
'''
after_rag_temolate = ChatPromptTemplate.from_template(after_rag_temolate)
after_rag_chain = (
    {"context":retriever,"query": RunnablePassthrough()}
    | after_rag_temolate
    | model_local
    | StrOutputParser()
)
print(after_rag_chain.invoke({"what is Ollama"}))


In [None]:
# ollama + gradio
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader,WebbaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnable import RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_spiltters import CharacterTextSpiltters
from langchian.output_parsers import PydanticOutputParser

def process_input(urls,question):
    model_local = ChatOllama(model="")
    urls_list = urls.split("\n")
    docs = [WebbaseLoader(url) for url in urls_list]
    docs_list = [item for sublist in docs for item in sublist]

    text_spiltter = CharacterTextSpiltters.from_text_encoder(chunk_size=7500,chunk_overlap=100)
    doc_spilts = text_spiltter.spilt_documents(docs_list)

    vectorstore = Chroma.from_documents(
        documents=doc_spilts,   
        collection_name="webbase",
        embedding=embeddings.ollama.OllamaEmbeddings(model="")
    )
    retreiver = vectorstore.as_retreiver()
    after_rag_template ='''
    Answer the question based only on the following context:
    {context}
    Question: {query}
    '''
    after_rag_template = ChatPromptTemplate.from_template(after_rag_template)
    after_rag_chain = (
        {"context":retreiver,"query":RunnablePassthrough()}
        | after_rag_template
        | model_local
        | StrOutputParser()
    )
    return after_rag_chain.invoke({"query":question})

# define  Gradio interface
iface = gr.Interface(
    fn=process_input,
    inputs=[gr.Textbox(label="enter input"),gr.Textbox(label="Question")],
    outputs="text",
    title="Ollama Chat",
    description="Chat with Ollama"
)
iface.launch()

In [None]:
# 直接提问无上传文件版本 + ollama + gradio langchain效果不好？不知道为什么...
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


path_db = "STDB"
model_name = "paraphrase-multilingual-MiniLM-L12-v2"
embedding = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = Chroma(persist_directory=path_db, embedding_function=embedding)


def process_input(question):
    model_local = ChatOllama(model="")
    retreiver = vectorstore.similarity_search(question,k=10)
    after_rag_template ='''
    Answer the question based only on the following context:
    {context}
    Question: {query}
    '''
    after_rag_template = ChatPromptTemplate.from_template(after_rag_template)
    after_rag_chain = (
        {"context":retreiver,"query":RunnablePassthrough()}
        | after_rag_template
        | model_local
        | StrOutputParser()
    )
    return after_rag_chain.invoke({"query":question})

# define  Gradio interface
iface = gr.Interface(
    fn=process_input,
    inputs=gr.Textbox(label="请提出你的问题"),
    outputs="text",
    title="llm Chat",
    description="根据已上传的文件内容进行问答"
)
iface.launch()

In [None]:
# 不用langchain版本，ollama+gradio 流式和非流式

import ollama
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr

path_db = "STDB"
model_name = "paraphrase-multilingual-MiniLM-L12-v2"
embedding = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = Chroma(persist_directory=path_db, embedding_function=embedding)   

def process_input(query):
    retriever = vectorstore.similarity_search(query,k=3)
    context = retriever
    prompt=f'''
    Answer the question based only on the following context:

    "{context}"

    Question: {query}
    '''
    # print(prompt)
    # 修改model
    stream = ollama.chat(model='wangrongsheng/mistral-7b-v0.3-chinese-chat', messages=[{'role': 'user', 'content': f"{prompt}"}], stream=False)
    return stream['message']['content']

    # 流式
    # stream = ollama.chat(model='wangrongsheng/mistral-7b-v0.3-chinese-chat', messages=[{'role': 'user', 'content': f"{prompt}"}], stream=True)
    # output = ""
    # for con in stream:
    #     output += con['message']['content']
    #     yield output

iface = gr.Interface(
    fn=process_input,
    inputs=gr.Textbox(label="请提出你的问题"),
    outputs="text",
    title="llm Chat",
    description="根据已上传的文件内容进行问答"
)
iface.launch()



In [None]:
# 上传pdf版本 + ollama + gradio
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnable import RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_spiltters import CharacterTextSpiltters
from langchian.output_parsers import PydanticOutputParser

import os
import numpy as np
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)
from sentence_transformers import SentenceTransformer
from ltp import StnSplit

path_docfolder = "STPDF"
path_db = "STDB"

model_name = "paraphrase-multilingual-MiniLM-L12-v2"

embedding_function = SentenceTransformerEmbeddings(model_name=model_name)

THRESHOLD = 70
    

class SemanticParagraphSplitter:
    def __init__(self, threshold=THRESHOLD, model_path=model_name):
        self.threshold = threshold
        self.model = SentenceTransformer(model_path)

    @staticmethod
    def cut_sentences(text):
        sentences = StnSplit().split(text)
        return sentences

    @staticmethod
    def combine_sentences(sentences, buffer_size=2):
        # Go through each sentence dict
        for i in range(len(sentences)):

            # Create a string that will hold the sentences which are joined
            combined_sentence = ''

            # Add sentences before the current one, based on the buffer size.
            for j in range(i - buffer_size, i):
                # Check if the index j is not negative (to avoid index out of range like on the first one)
                if j >= 0:
                    # Add the sentence at index j to the combined_sentence string
                    combined_sentence += sentences[j]['sentence'] + ' '

            # Add the current sentence
            combined_sentence += sentences[i]['sentence']

            # Add sentences after the current one, based on the buffer size
            for j in range(i + 1, i + 1 + buffer_size):
                # Check if the index j is within the range of the sentences list
                if j < len(sentences):
                    # Add the sentence at index j to the combined_sentence string
                    combined_sentence += ' ' + sentences[j]['sentence']

            # Then add the whole thing to your dict
            # Store the combined sentence in the current sentence dict
            sentences[i]['combined_sentence'] = combined_sentence

        return sentences

    def build_sentences_dict(self, sentences):
        indexed_sentences = [{'sentence': x, 'index': i} for i, x in enumerate(sentences)]
        combined_sentences = self.combine_sentences(indexed_sentences)

        embeddings = self.model.encode([x['combined_sentence'] for x in combined_sentences], normalize_embeddings=True)

        for i, sentence in enumerate(combined_sentences):
            sentence['combined_sentence_embedding'] = embeddings[i]

        return combined_sentences

    @staticmethod
    def calculate_cosine_distances(sentences):
        distances = []
        for i in range(len(sentences) - 1):
            embedding_current = sentences[i]['combined_sentence_embedding']
            embedding_next = sentences[i + 1]['combined_sentence_embedding']

            # Calculate cosine similarity
            # similarity = cosine_similarity([embedding_current], [embedding_next])[0][0]
            similarity = embedding_current @ embedding_next.T
            # Convert to cosine distance
            distance = 1 - similarity

            # Append cosine distance to the list
            distances.append(distance)

            # Store distance in the dictionary
            sentences[i]['distance_to_next'] = distance

        # Optionally handle the last sentence
        # sentences[-1]['distance_to_next'] = None  # or a default value

        return distances, sentences

    def calculate_indices_above_thresh(self, distances):
        breakpoint_distance_threshold = np.percentile(distances, self.threshold)
        # The indices of those breakpoints on your list
        indices_above_thresh = [i for i, x in enumerate(distances) if x > breakpoint_distance_threshold]
        return indices_above_thresh

    @staticmethod
    def cut_chunks(indices_above_thresh, sentences):
        # Initialize the start index
        start_index = 0

        # Create a list to hold the grouped sentences
        chunks = []

        # Iterate through the breakpoints to slice the sentences
        for index in indices_above_thresh:
            # The end index is the current breakpoint
            end_index = index

            # Slice the sentence_dicts from the current start index to the end index
            group = sentences[start_index:end_index + 1]
            combined_text = ' '.join([d['sentence'] for d in group])
            chunks.append(combined_text)

            # Update the start index for the next group
            start_index = index + 1

        # The last group, if any sentences remain
        if start_index < len(sentences):
            combined_text = ' '.join([d['sentence'] for d in sentences[start_index:]])
            chunks.append(combined_text)

        return chunks

    def split(self, text):
        single_sentences = (self.cut_sentences(text)) #Pre-split with standard function
        print(f"{len(single_sentences)} single sentences were found")
        if len(single_sentences) == 1:
        # 如果只有一句话，直接返回这句话
            return single_sentences
        else:
        # 如果有多句话，进行分割
            chunks = self.split_passages(single_sentences)
            return chunks
    def split_passages(self, passages):
        combined_sentences = self.build_sentences_dict(passages)
        distances, sentences = self.calculate_cosine_distances(combined_sentences)

        indices_above_thresh = self.calculate_indices_above_thresh(distances)
        chunks = self.cut_chunks(indices_above_thresh, sentences)
        return chunks

def read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding):
    # Iterate over all files in the folder
    full_content = []
    for filename in os.listdir(path_docfolder):
        #print(filename)
        if filename.endswith('.pdf'):  # Check if the file is a PDF
            file_path = os.path.join(path_docfolder, filename)
            print(f"Reading file: {file_path}")

            # Open the PDF file
            loader = PyPDFLoader(file_path)
            pages_pypdf = loader.load()
            for page in pages_pypdf:
                # 内容全合并在一起
                full_content.append(page.page_content.replace(" ", "").replace("\n", ""))

    text_splitter = SemanticParagraphSplitter(threshold=THRESHOLD)
            # text_splitter = RecursiveCharacterTextSplitter(
            #     chunk_size=260,
            #     chunk_overlap=20,
            # )
    for content in full_content:
        docs = text_splitter.split(content)
        

    # Facility Step 3:用特定模型做embedding
    #db2 = Chroma.from_documents(docs, embedding, persist_directory=path_db)
        db2 = Chroma.from_texts(docs, embedding, persist_directory=path_db)
        print("Successfully save the embedding into DB")
    return True

read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding_function)


def process_input(urls,question):
    model_local = ChatOllama(model="")
    urls_list = urls.split("\n")
    docs = [WebbaseLoader(url) for url in urls_list]
    docs_list = [item for sublist in docs for item in sublist]

    text_spiltter = CharacterTextSpiltters.from_text_encoder(chunk_size=7500,chunk_overlap=100)
    doc_spilts = text_spiltter.spilt_documents(docs_list)

    vectorstore = Chroma.from_documents(
        documents=doc_spilts,   
        collection_name="webbase",
        embedding=embeddings.ollama.OllamaEmbeddings(model="")
    )
    retreiver = vectorstore.as_retreiver()
    after_rag_template ='''
    Answer the question based only on the following context:
    {context}
    Question: {query}
    '''
    after_rag_template = ChatPromptTemplate.from_template(after_rag_template)
    after_rag_chain = (
        {"context":retreiver,"query":RunnablePassthrough()}
        | after_rag_template
        | model_local
        | StrOutputParser()
    )
    return after_rag_chain.invoke({"query":question})

# define  Gradio interface
iface = gr.Interface(
    fn=process_input,
    inputs=[gr.Files(accept=".pdf",label="请上传pdf文件"),gr.Textbox(label="请提出你的问题")],
    outputs="text",
    title="llm Chat",
    description="根据已上传的文件内容进行问答"
)
iface.launch()

In [42]:
from docx import Document
#import ollama 
# 加载Word文档
doc = Document('E:\ganhuo\INESANETliterature\文件夹\软件测试案例方案\术语.docx')

tables=doc.tables
row_data1=[]
for i in range(len(tables)):
    tb=tables[i]
    #获取表格的行
    tb_rows=tb.rows
    #读取每一行内容

    for i in range(len(tb_rows)):
        if i!=0:
            row_data=[]
            row_cells=tb_rows[i].cells
            #读取每一行单元格内容
            for cell in row_cells:
                #单元格内容
                row_data.append(cell.text)
            # print(row_data)
            row_data1.append(row_data)
       
# jsonlist=[]

# for data in row_data1:
#     prompt=f'''
#     请仅基于我发你的文字生成格式如为下的json问答
#     格式为：
#     {{
#         "instruction": "",
#         "input": "",
#         "output": ""
#         }}
#     例子：
#     输入：['自动售检票系统', '是基于计算机技术、网络技术、自动控制技术等技术能够实现购票、检票、计费、收费、统计全过程的自动化系统。']
#     生成：
#     {{
#         "instruction": "请问自动售检票系统是什么",
#         "input": "",
#         "output": "是基于计算机技术、网络技术、自动控制技术等技术能够实现购票、检票、计费、收费、统计全过程的自动化系统。"
#     }}
#     输入：['AID', 'Application Identifier', '应用标识']
#     生成：
#     {{
#         "instruction": "请问AID是什么意思",
#         "input": "",
#         "output": "英文为Application Identifier，中文为应用标识"
#     }}
#     输入：{data}

#     '''
#     context = ollama.chat(model='wangrongsheng/mistral-7b-v0.3-chinese-chat', messages=[{'role': 'user', 'content': f"{prompt}"}], stream=False)
#     jsonlist.append(context['message']['content'])
#     print(jsonlist)


['线路中央计算机系统', '管理与控制城市轨道交通线路自动售检票系统的计算机系统']
