# Search optimization techniques in AI Search

In [6]:
import os
from dotenv import load_dotenv
import pandas as pd
from openai import AzureOpenAI
from pypdf import PdfReader

# Load environment variables from .env file
load_dotenv(override=True)

True

In [2]:
search_service_endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
search_service_key = os.environ["AZURE_SEARCH_KEY"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1024))
embedding_model_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-06-01")

In [4]:
embedding = AzureOpenAI(
        azure_deployment=azure_openai_embedding_deployment,
        api_version=azure_openai_api_version,
        azure_endpoint=azure_openai_endpoint,
        api_key=azure_openai_key
    )

def get_embeddings(text):
    response = embedding.create(input=text, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions)
    return response.data[0].embedding

# def get_quantized_embeddings(text):
#     response = openai_client.embeddings.create(input=text, model=embedding_model_name, dimensions=azure_openai_embedding_dimensions)
#     return response.data[0].quantized_embedding

In [5]:
data_folder = 'data'
files = os.listdir(data_folder)
file_path = os.path.join(data_folder, files[0])
pdf_content = PdfReader(file_path)

In [23]:
from azure.search.documents.indexes import SearchIndexClient
import azureml.vector_optimizations.azure_search_manager as asm

indexes = {
    "all-options-with-binary": {
        "use_binary_compression": True,
        "use_float16": True,
        "use_stored": False,
        "truncation_dimension": 1024
    }
}
base_index_name = "movies"
embedding_dimensions = azure_openai_embedding_dimensions
search_index_client = SearchIndexClient(search_service_endpoint, search_service_key)
for index, options in indexes.items():
    index = asm.create_index(f"{base_index_name}-{index}", dimensions=embedding_dimensions, **options)
    search_index_client.create_or_update_index(index)
print("Created indexes")

AttributeError: module 'azure_search_manager' has no attribute 'create_index'

In [3]:
from typing import List
import requests
import numpy as np

def fetch_embeddings(query_text: str) -> dict:
    # Define the URL and the data payload
    endpoint = "http://localhost:11434/api/embeddings"
    payload = {
        "model": "mxbai-embed-large:latest",
        "prompt": query_text
    }

    # Send the POST request
    response = requests.post(url=endpoint, json=payload)

    # Print the response
    return response.json()


def scalar_quantization(input_vector: List[float], quantization_type=np.uint8) -> List[int]:
    # Calculate the min and max for each dimension
    min_vals = np.min(input_vector, axis=0)
    max_vals = np.max(input_vector, axis=0)

    print(min_vals, max_vals)

    # Calculate scaling factor and zero point for each dimension
    scaling_factors = (max_vals - min_vals) / 255.0
    zero_points = -min_vals / scaling_factors

    # Quantize the embeddings
    quantized_embeddings = np.round((input_vector - min_vals) / scaling_factors).astype(quantization_type)

    return quantized_embeddings.tolist()


def binary_quantization(input_vector: List[float]) -> List[int]:
    # Convert embeddings to -1 or 1 based on their sign
    binary_embeddings = np.where(np.array(input_vector) >= 0, 1, -1)
    return binary_embeddings.tolist()


vector_response = fetch_embeddings(
    query_text="I am doing hard research on the quantization techniques in embeddings and LLM weights "
               "and its impact on hallucination")

embedding = vector_response['embedding']
print('Original Vector:')
print(vector_response['embedding'][:10])

scalar_quantized_vector = scalar_quantization(input_vector=embedding, quantization_type=np.uint16)
print('\nScalar Quantized Vector:')
print(scalar_quantized_vector[:10])

binary_quantized_vector = binary_quantization(input_vector=embedding)
print('\nBinary Quantized Vector:')
print(binary_quantized_vector[:10])

Original Vector:
[0.16300752758979797, 0.22726836800575256, -0.20111069083213806, -0.6051528453826904, 0.4125409722328186, -0.10740336775779724, 0.5433188080787659, 0.40219298005104065, -0.3696734309196472, 0.559572160243988]
-1.653213620185852 4.231903076171875

Scalar Quantized Vector:
[79, 81, 63, 45, 90, 67, 95, 89, 56, 96]

Binary Quantized Vector:
[1, 1, -1, -1, 1, -1, 1, 1, -1, 1]
