In [4]:
# ! pip install langchain pypdf
# !pip install sentence-transformers==2.2.2

In [5]:
# ! pip install huggingface

In [6]:
# !pip install faiss-cpu

In [7]:
# !pip install tiktoken

In [8]:
from langchain.embeddings import HuggingFaceEmbeddings

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [10]:
from langchain import HuggingFaceHub

In [35]:
from google.colab import userdata
HUGGINGFACEHUB_API_TOKEN = userdata.get('HUGGINGFACEHUB_API_TOKEN')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

WEAVIATE_CLUSTER = userdata.get('WEAVIATE_CLUSTER')
WEAVIATE_API_KEY = userdata.get('WEAVIATE_API_KEY')
import os

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

os.environ['WEAVIATE_CLUSTER'] = WEAVIATE_CLUSTER
os.environ["WEAVIATE_API_KEY"] = WEAVIATE_API_KEY

In [12]:
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

In [13]:
# create the chain to answer questions
llm=HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", model_kwargs={"temperature":0.9,"max_length":512})


  warn_deprecated(


In [14]:
# !mkdir pdfs

mkdir: cannot create directory ‘pdfs’: File exists


In [15]:
from langchain.document_loaders import PyPDFDirectoryLoader

In [16]:
loader = PyPDFDirectoryLoader("pdfs")
data = loader.load()

In [17]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [18]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
text_chunks = text_splitter.split_documents(data)

In [19]:
len(text_chunks)

29

In [20]:
text_chunks[2]

Document(page_content='CS391R: Robot Learning (Fall 2021)3Importance of Object Detection for Robotics❖Visual modality is very powerful❖Humans are able to detect objects and do perception using just this modality in real time (not needing radar) ❖If we want responsive robot systems that work in real time (without specialized sensors) almost real time vision based object detection can help greatly\nVision based vs LIDAR (self driving)\nTesla Investor Day Presentation', metadata={'source': 'pdfs/yolo.pdf', 'page': 2})

In [21]:
from langchain.vectorstores import FAISS

In [22]:
vectorstore= FAISS.from_documents(text_chunks,embeddings)

In [23]:
query = "what is yolo?"

In [24]:
docs=vectorstore.similarity_search(query, k=3)

In [25]:
docs

[Document(page_content='image each time leading to less false positives (has contextual information for detection) YOLO algorithm', metadata={'source': 'pdfs/yolo.pdf', 'page': 4}),
 Document(page_content='CS391R: Robot Learning (Fall 2021)20Discussion of Results❖Pro: YOLO is a lot faster than the other algorithms for image detection❖Pro: YOLO’s use of global information rather than only local information allows it to understand contextual information when doing object detection➢Does better in domains such as artwork due to this❖Con: YOLO lagged behind the SOTA models in object detection➢This is attributed to making many localization errors and unable to detect small object', metadata={'source': 'pdfs/yolo.pdf', 'page': 19}),
 Document(page_content='CS391R: Robot Learning (Fall 2021)2Problem Addressed: Object Detection❖Object detection is the problem of both locating ANDclassifying objects ❖Goal of YOLO algorithm is to do object detection both fast ANDwith high accuracy\n“Deep Learning

In [26]:
from langchain.chains import RetrievalQA

In [27]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [28]:
query="for which type of algorithms Yolo is used?"

In [29]:
print(qa.run(query))

  warn_deprecated(


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

image each time leading to less false positives (has contextual information for detection) YOLO algorithm

CS391R: Robot Learning (Fall 2021)20Discussion of Results❖Pro: YOLO is a lot faster than the other algorithms for image detection❖Pro: YOLO’s use of global information rather than only local information allows it to understand contextual information when doing object detection➢Does better in domains such as artwork due to this❖Con: YOLO lagged behind the SOTA models in object detection➢This is attributed to making many localization errors and unable to detect small object

CS391R: Robot Learning (Fall 2021)15Experimental Setup❖Authors compare YOLO against the previous work described above on PASCAL VOC 2007, and VOC 2012 as well as out of domain art dataset ❖Correct if IOU metric above .5 and class is correct❖Use two pe

In [30]:
query="what are cons of yolo?"

In [37]:
print(qa.run(query))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

but P(Object), lowers interpretability❖Another limitation of YOLO is that it imposed spatial constraints on the objects in the image since only B boxes can be predicted on an SxS grid❖Since the architecture only predicts boxes, this might make it less useful for irregular shapes

image each time leading to less false positives (has contextual information for detection) YOLO algorithm

CS391R: Robot Learning (Fall 2021)21Critique / Limitations / Open Issues ❖Performance lags behind SOTA ❖Requires data to be labeled with bounding boxes, hard to collect for many classes➢Previous work could generalize better since it used image classifier➢2014 COCO dataset (very large dataset) addressed this somewhat❖Regarding experiments: number of classes predicted is very limited➢Not convinced that YOLO v1 is generalizable ❖Confidence output 

In [31]:
# !pip install weaviate-client

In [32]:
# !pip install unstructured
# !pip install "unstructured[pdf]"

In [36]:
import weaviate
from langchain.vectorstores import Weaviate

#Connect to weaviate Cluster
auth_config = weaviate.auth.AuthApiKey(api_key = WEAVIATE_API_KEY)
WEAVIATE_URL = WEAVIATE_CLUSTER

client = weaviate.Client(
    url = WEAVIATE_URL,
    additional_headers = {"X-OpenAI-Api-key": OPENAI_API_KEY},
    auth_client_secret = auth_config,
    startup_period = 10
)

In [37]:
client.is_ready()

True

In [38]:
# define input structure
client.schema.delete_all()
client.schema.get()
schema = {
    "classes": [
        {
            "class": "Chatbot",
            "description": "Documents for chatbot",
            "vectorizer": "text2vec-openai",
            "moduleConfig": {"text2vec-openai": {"model": "ada", "type": "text"}},
            "properties": [
                {
                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-openai": {
                            "skip": False,
                            "vectorizePropertyName": False,
                        }
                    },
                    "name": "content",
                },
            ],
        },
    ]
}

client.schema.create(schema)
vectorstore = Weaviate(client, "Chatbot", "content", attributes=["source"])

In [39]:
# load text into the vectorstore
text_meta_pair = [(doc.page_content, doc.metadata) for doc in text_chunks]
texts, meta = list(zip(*text_meta_pair))
vectorstore.add_texts(texts, meta)

{'error': [{'message': 'update vector: connection to: OpenAI API failed with status: 401 error: Incorrect API key provided: hf_QGoyo*************************WPCt. You can find your API key at https://platform.openai.com/account/api-keys.'}]}
{'error': [{'message': 'update vector: connection to: OpenAI API failed with status: 401 error: Incorrect API key provided: hf_QGoyo*************************WPCt. You can find your API key at https://platform.openai.com/account/api-keys.'}]}
{'error': [{'message': 'update vector: connection to: OpenAI API failed with status: 401 error: Incorrect API key provided: hf_QGoyo*************************WPCt. You can find your API key at https://platform.openai.com/account/api-keys.'}]}
{'error': [{'message': 'update vector: connection to: OpenAI API failed with status: 401 error: Incorrect API key provided: hf_QGoyo*************************WPCt. You can find your API key at https://platform.openai.com/account/api-keys.'}]}
{'error': [{'message': 'update v

['89434192-f2c5-4c00-985f-cb43a05337a9',
 '229a7fb9-2e92-4438-8ad8-510f59ca30ed',
 '41a8d56f-bf4b-4b0d-9d06-b0da22e764c2',
 '1ab80c45-4ea4-4999-9e1a-322c168c1f2a',
 '18387bd5-c1b5-4049-80f4-d31bf8c2eb46',
 '02daf152-4e3a-4164-a2e7-6776621f9fe5',
 '3ec69445-4a10-4093-80bc-59f98f27d37c',
 '54a98499-30be-4b90-98c1-53ea9de45e90',
 '41e60a81-6416-4914-8012-8ef9854bb5ef',
 '94212c0f-0ef1-408f-9555-70d2df5e9230',
 '0a6e40cc-e969-4905-819d-a2f9c7e5c13b',
 'bad680ce-39db-4a40-bb62-c16387a165a6',
 '613191fe-ab30-44e1-a4f5-fec863d0a34a',
 '56d96943-832f-4c97-8cb4-141a049fbed3',
 '5b819340-3df7-44a7-a212-a67746223342',
 '6ea93480-e5ff-41b5-ae85-5c1fe5c6acf1',
 'd8d118d9-91a5-484b-aef2-aea478628970',
 '33b81866-8350-42ce-8a47-03427cf435ce',
 '5d9bc0b3-39f9-49ad-8428-5524c50b2e56',
 '07f8122b-0390-42cd-846f-76cff1b5eb24',
 'c4c4fb9d-e202-4f7b-b7fd-54994ed8a8f3',
 '760b5eb4-ed6d-4b74-9511-a7ce223d9eda',
 '4a9663d5-5fcc-4c9a-ba96-2b28000ddc22',
 'af1c5c40-e6d7-47f9-a97d-79b646b0413d',
 'e8c2ce23-7cef-