In [1]:
from qdrant_client import QdrantClient
from pypdf import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain import PromptTemplate
from qdrant_client.models import Distance, VectorParams
from qdrant_client.models import PointStruct
import ollama
import numpy as np

In [2]:
client = QdrantClient(url="http://localhost:6333")

In [3]:
files = ['../../database/documents/2010.11929v2.pdf',
         '../../database/documents/2212.06727v1.pdf',
         '../../database/documents/2312.01232v2.pdf']

In [4]:
splitter = RecursiveCharacterTextSplitter(['. ', ','],
                                          keep_separator=True,
                                          chunk_size=500,
                                          chunk_overlap=50)

In [5]:
embedder = OllamaEmbeddings(
    model="llama3"
)

embed_size = np.array(embedder.embed_query('huh')).size

In [6]:
if not client.collection_exists("computer_vision"):

    client.create_collection(
        collection_name="computer_vision",
        vectors_config=VectorParams(size=embed_size, distance=Distance.COSINE),
    )

    idx = 0
    for file in files:
        reader = PdfReader(file)

        for page in reader.pages:
            text = page.extract_text()

            documents = splitter.split_text(text=text)

            embeddings = embedder.embed_documents(documents)

            client.upsert(
            collection_name="computer_vision",
                points=[
                    PointStruct(
                        id= idx + i,
                        vector=embeddings[i],
                        payload={"document": documents[i], 'file_name': file}
                    )
                    for i in range(len(documents))
                ]
            )
            idx += len(documents)

else:
    print('vectorstore already exists')

vectorstore already exists


In [7]:
query = 'What are the recent advancements in computer vision'

query_embed = np.array(embedder.embed_query(query))

In [8]:
db_response = client.query_points(
    collection_name="computer_vision", query=query_embed, limit=5
)

search_result = db_response.points

docs = [result.payload['document'] for result in search_result]

context = ''
for doc in docs:
    context += doc + '\n'

print(context)

, [43] is a large-scale image
database created by researchers at Princeton University, ini-
tially designed for the purpose of object recognition research
in computer vision
. The area
of a circle represents the scale of each dataset (i.e., the number of images).
previous GPT models, which were primarily designed
for generating natural language text, iGPT has been
trained to generate high-resolution images from textual
descriptions.
In Fig. 3, we present a chronological overview of recent
representative work
The Stand-Alone Self-Attention in Vision Models [18]
marks an early departure from convolutional approaches in
computer vision
. The JFT-300M dataset is
challenging due to its large size and the diversity of images
and categories, making it a valuable resource for advancing
research in the field of computer vision
. Previous survey papers either put more effort
into convolutional neural networks [33], [34], [35], [36] or
focus on broader topics such as video processing [37], medica

In [9]:
template = \
"""
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

{context}

Question: {question}

Helpful Answer:
"""


prompt_template = PromptTemplate.from_template(template)

formmatted_prompt = prompt_template.format(context=context, question=query)

print(formmatted_prompt)


Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

, [43] is a large-scale image
database created by researchers at Princeton University, ini-
tially designed for the purpose of object recognition research
in computer vision
. The area
of a circle represents the scale of each dataset (i.e., the number of images).
previous GPT models, which were primarily designed
for generating natural language text, iGPT has been
trained to generate high-resolution images from textual
descriptions.
In Fig. 3, we present a chronological overview of recent
representative work
The Stand-Alone Self-Attention in Vision Models [18]
marks an early departure from convolutional approaches in
computer vision
. The JFT-300M dataset is
challenging due to its large size and the diversity of images
and categories, making it a valuab

In [10]:
llm_response = ollama.generate(
    model='llama3',
    prompt=formmatted_prompt
)

print(llm_response['response'])

The Stand-Alone Self-Attention in Vision Models [18] marks an early departure from convolutional approaches in computer vision. The JFT-300M dataset is challenging due to its large size and diversity of images and categories, making it a valuable resource for advancing research in the field of computer vision.
