# Retrieval script for QA

In [None]:
# Installing libraries - uncomment code below
# !pip install -qU langchain qdrant-client openai pypdf pypdf2 langchain-openai langchain-community uuid tiktoken

# Import Libraries

In [None]:
import os
import time
import asyncio
from langchain_openai import OpenAIEmbeddings

from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant

from langchain.chains import VectorDBQA
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI
from langchain_community.vectorstores import Qdrant
from langchain.schema import retriever

In [None]:
# Load the environment variables
qdrant_host = os.getenv('QDRANT_HOST')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [None]:
# Initialize the embedding model
embeddings_model = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=768, api_key=openai_api_key)

In [None]:
# Initialize qdrant client
client = QdrantClient(url=qdrant_host)

In [None]:
# Get list of collections present in qdrant store
collections = client.get_collections()

In [None]:
# Print list of collections
list_of_collections = collections.collections
for collection in list_of_collections:
  print(collection.name)

RegDb vectors 1000-chars-sb
llamaparse pl 256-tokens
RegDb Vectors v1
RegDb Vectors v2
test 768-dim
test
RegDb Vectors
llamaparse 3-pdf files
New-768-dim-sb


# Configure doc store

In [None]:
# Connect to a document store
doc_store = Qdrant(
        client=client,
        collection_name='New-768-dim-sb', # Can change the collection here
        embeddings = embeddings_model
        )

In [None]:
# initialize openai llm
llm = OpenAI(api_key=openai_api_key)

# Configure the QARetrieval chain for chat

In [None]:
qa = RetrievalQA.from_chain_type(
        llm = llm,
        chain_type="stuff",
        retriever= doc_store.as_retriever(),
        return_source_documents=True
    )

# Enter prompt query

In [None]:
# user_query = 'What is registration of type 1 dealing in securities activities?'
# user_query = 'What are the virtual assets regulations by the SFC in hong kong.'
# user_query = 'What did the SEC staff advise Judge Torres?'
# user_query = 'What does the 1940 Act provides?'
# user_query = 'When was Celsius Crypto lender Celsius founded'
# user_query = 'What will regulated Activity in relation to Virtual Assets be granted?'
# user_query = 'Who identified the risks associated with investing in virtual assets and in which year?'
# user_query = 'explain virtual asset-knowledge test'
# user_query = 'What does the 7.1. Selling restrictions state?'
# user_query = 'principal Rules for Authorised Persons conducting a Regulated Activity in relation to Virtual Assets are set out in which chapter?'
# user_query = 'What does the COBS Rule 17.2.1 permits?'/
# user_query = 'What will an applicant applying for an FSP need to submit?'
# user_query = 'What is FSRA and FSP and what do they do?'
# user_query = 'What was the SFC regulation for virtual asset in 2018? Give me all possible details.'
# user_query = 'Give me the latest crypto regulation by SFC in HongKong.'
# user_query = 'What transition period is provided for intermediaries serving existing clients of VA dealing services before the full implementation of the updated requirements? '
user_query = 'Explain the travel rule and where and why it poses a challenge?'
response = qa.invoke(user_query)

# Print the sources of the documents

In [None]:
# Print the sources where the response is coming from
sources = response['source_documents']

for source in sources:
  print(source.page_content)
  print('############################')
  print()

In [None]:
# By default the metadata returned is the collection name and document id 
# So here we retrieve more meta data using these variables
retrieved_docs = []

for source in sources:
  doc_id = source.metadata['_id']
  collection_name = source.metadata['_collection_name']

  # Query every doc and append to list
  document = client.retrieve(
       collection_name=collection_name,
       ids=[doc_id],
       with_payload=True
  )

  retrieved_docs.append(document)

In [None]:
retrieved_docs

[[Record(id='ee989b8d-4daa-4350-b2a6-9c977c71a41c', payload={'author': '', 'creation_date': "D:20231027094747+01'00'", 'creator': 'Adobe InDesign 19.0 (Windows)', 'modification_date': "D:20231027104743+01'00'", 'page': 104, 'page_content': 'solutions to enable the implementation of the “travel rule” was noted, although issues \nremain to be addressed by the public and private sectors for a practical implementation \nof the recommendations.', 'producer': 'Adobe PDF Library 17.0', 'source': 'pdf_files/Blockchain and Cryptocurrency Regulation 2024.pdf', 'title': ''}, vector=None, shard_key=None)],
 [Record(id='6b63f3cc-0d74-42d5-8cd7-ed0ecda85e9d', payload={'author': '', 'creation_date': "D:20231027094747+01'00'", 'creator': 'Adobe InDesign 19.0 (Windows)', 'modification_date': "D:20231027104743+01'00'", 'page': 105, 'page_content': 'only 29 countries have currently implemented travel rule requirements applicable to \nV As and V ASPs and only 11 have started enforcement, out of 98 countri

In [None]:
# Print out the source docs and detailed metadata
for each_doc in retrieved_docs:
  record = each_doc[0]

  retrieved_payload = record.payload
  source = retrieved_payload['source']
  page_content = retrieved_payload['page_content']
  page_number = retrieved_payload['page']

In [None]:
print(f"User Query: {response['query']}")
print(f"Response: {response['result']}")

User Query: Explain the travel rule and where and why it poses a challenge?
Response:  The travel rule, as set by the FATF, requires virtual asset service providers (VASPs) to collect and transfer certain information about their customers when conducting transactions. This includes information such as the originator and beneficiary of the transaction, as well as the amount and date of the transaction. While there are technological solutions to enable compliance with this rule, it remains a challenge for both the public and private sectors to implement it globally. Currently, only 29 countries have implemented travel rule requirements and only 11 have started enforcing them. The challenge lies in ensuring interoperability between different solutions and addressing issues such as those posed by decentralized finance (DeFi), where the traditional rules of jurisdiction and territorial application may not apply. Additionally, the recently adopted EU regulation, TFR, aims to strengthen AML/C