In [1]:
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.vectorstores import Qdrant

from bs4 import BeautifulSoup
import requests
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter

import dotenv
dotenv.load_dotenv(".env")


True

In [2]:
url= "https://docs.oracle.com/en-us/iaas/Content/GSG/Reference/faq.htm"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
# paragraphs_raw = soup.find_all('p')[:3]
paragraphs_raw = soup.find_all('p')
raw_text = [re.sub("\\n\\s+"," ", paragraph.get_text()) for paragraph in paragraphs_raw]
# clean_text=re.sub("\.+\\s*\.*", ". ", '.'.join(raw_text))
clean_text='.'.join(raw_text)
splitter = RecursiveCharacterTextSplitter(chunk_size = 125,
    chunk_overlap  = 0,
    length_function = len,)
text_list = splitter.split_text(clean_text)
text_list

['Make sure that you are signing in with your Oracle Identity Cloud Service login credentials. To ensure that you sign in',
 'through IDCS:.Go to https://cloud.oracle.com..You are prompted to enter your cloud tenant, your user name, and your',
 "password. After you're authenticated, you're directed to a region your tenancy is subscribed to. You can switch to other",
 'regions you are subscribed to by using the region selector at the top of the Console..If you need more help signing in, see',
 'Sign In to the Console..In the Oracle Cloud Infrastructure commercial realmÂ\xa0, you can now use https://cloud.oracle.com to',
 'sign in to the Console from any region. The regional URLs are being deprecated..Having one Console URL has the following',
 'benefits:.If the Console displays a banner notifying you of this change, you are currently using regional URLs. With the',
 'move to a single URL, you need to complete the following tasks:.If you have questions or a custom configuration, create a

In [3]:
embeddings = CohereEmbeddings(model='multilingual-22-12')

db = Qdrant.from_texts(text_list, embeddings, location=":memory:", collection_name="my_documents", distance_func="Dot")

In [4]:
queries = ["How to find your home region",
           "How to subscribe to another region",
           "How to login to IDCS",
           "How to increase the service limit",
           "In which region is the tenancy created",
          ]

In [5]:
answers = []

for query in queries:
    docs = db.similarity_search(query)
    answers.append(docs[0].page_content)

answers

['regions you are subscribed to by using the region selector at the top of the Console..If you need more help signing in, see',
 'regions you are subscribed to by using the region selector at the top of the Console..If you need more help signing in, see',
 'through IDCS:.Go to https://cloud.oracle.com..You are prompted to enter your cloud tenant, your user name, and your',
 "Profile menu and click Tenancy: <your_tenancy_name>..You can view your tenancy's service limits in the Console and request",
 'Infrastructure, Oracle creates a tenancy for you in one region. This is your home region. Your home region is where your IAM']