In [None]:
import dotenv
dotenv.load_dotenv()

In [None]:
import base64

# Python port of .NET's HttpServerUtility.UrlTokenDecode
# for decoding the storage path from the metadata (which is encoded in base64url)
def url_token_decode(encoded_string: str) -> bytes:
    # Get the padding count from the last character of the encoded string
    padding_count = ord(encoded_string[-1]) - ord("0")
    
    # Remove the last character (which indicates padding)
    encoded_string = encoded_string[:-1]
    
    # Replace - and _ to revert to standard base64
    encoded_string = encoded_string.replace("-", "+").replace("_", "/")
    
    # Add back the removed padding
    encoded_string += "=" * padding_count
    
    # Base64 decode
    return base64.b64decode(encoded_string).decode("utf-8")



In [None]:
query = input("Enter a query for Cognitive Search")

In [None]:
from langchain.retrievers import AzureCognitiveSearchRetriever

retriever = AzureCognitiveSearchRetriever(top_k=10)
documents = retriever.get_relevant_documents(query)

In [None]:
print(f"Found {len(documents)} results")

for document in documents:
    storage_path = url_token_decode(document.metadata["metadata_storage_path"]).strip()
    search_score = document.metadata["@search.score"]
    content_length = len(document.page_content)
    print(f"Search score: {search_score} Document: {storage_path} (content length: {content_length})")
