In [1]:
from pymilvus import connections, CollectionSchema, FieldSchema, DataType, Collection
from pymilvus import utility
from dotenv import load_dotenv
from langchain_community.vectorstores import Milvus
from langchain_openai import OpenAIEmbeddings
from langchain.chains.query_constructor.base import AttributeInfo
from langchain_openai import ChatOpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
import hashlib
import json
import re
import os


In [2]:
load_dotenv()
embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)

In [3]:
milvus = Milvus( embeddings, collection_name="ImageInterpretation2", connection_args={"host": "localhost", "port": "19530"})


In [4]:
milvus.add_texts(["This sensor is new technology"], metadatas=[{"source":"sensor1.txt", "page":1}])
milvus.add_texts(["This sensor y usefull for moviment tracking"], metadatas=[{"source":"sensor1.txt", "page":2}])
milvus.add_texts(["It has a limit of 5 miles per hour"], metadatas=[{"source":"sensor1.txt", "page":3}])
milvus.add_texts(["May fail under high temperatures"], metadatas=[{"source":"sensor1.txt", "page":4}])

milvus.add_texts(["This sensor is old technology"], metadatas=[{"source":"sensor2.txt", "page":1}])
milvus.add_texts(["This sensor y usefull for meassuring temperature"], metadatas=[{"source":"sensor2.txt", "page":2}])
milvus.add_texts(["It has a limit of 100 celsius degree"], metadatas=[{"source":"sensor2.txt", "page":3}])
milvus.add_texts(["Minimum temperature is -20 celsius"], metadatas=[{"source":"sensor2.txt", "page":4}])

[447427853266858405]

In [5]:
metadata_field_info = [
    AttributeInfo(
        name="source",
        description="the source file f the text.",
        type="string",
    ),
    AttributeInfo(
        name="page",
        description="The page number in the source of the text",
        type="integer",
    )
]
document_content_description = "Texts about sensors"

In [6]:
retriever = SelfQueryRetriever.from_llm(
    llm, milvus, document_content_description, metadata_field_info, enable_limit=True, verbose=True
)

In [8]:
docs = retriever.get_relevant_documents("How many sensors are temperature related", verbose=True)

In [None]:
prompt = ChatPromptTemplate.from_template(
    """You are an expert in sensor model {model} and a customer is asking you about {query}. You can answer with the following information: {info}.
    If you are given the source if the info, you can also provide a link to the source at the end of your answer with the following format: [../data/abc.pdf page:12]"""
)
output_parser = StrOutputParser()
chain = (
    {"query": RunnablePassthrough(),
     "model": RunnablePassthrough(),
     "info": RunnablePassthrough()} 
    | prompt
    | llm
    | output_parser
)