In [None]:
%pip install openai
%pip install azure-search-documents
%pip install text_chunker
%pip install nltk

In [31]:
import os
import time
from openai import AzureOpenAI
from text_chunker import TextChunker
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

In [None]:
service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
key = os.getenv("AZURE_SEARCH_API_KEY")
TEXT_EMBEDDING_ADA_002 = "text-embedding-ada-002"
chunck_size = 1024
index_name = "faq-bc-arquivo"

chukner = TextChunker(maxlen=chunck_size)
search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))

# inicialização do cliente Azure OpenAI
client = AzureOpenAI(
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
    api_key=os.getenv("AZURE_OPENAI_KEY"),  
    api_version="2024-02-15-preview"
)

def read_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()
    
# função para gerar embeddings
def generate_embeddings(text, model):
    start_time = time.perf_counter()
    embeddings = client.embeddings.create(input = [text], model=model).data[0].embedding
    elapsed_time = time.perf_counter() - start_time
    return embeddings, elapsed_time

def upload_document(document):
    result = search_client.upload_documents(documents=[document])
    print("Upload of new document succeeded: {}".format(result[0].succeeded))

def merge_document():
    result = search_client.merge_documents(documents=[{"hotelId": "1000", "rating": 4.5}])
    print("Merge into new document succeeded: {}".format(result[0].succeeded))


def delete_document():
    result = search_client.delete_documents(documents=[{"hotelId": "1000"}])
    print("Delete new document succeeded: {}".format(result[0].succeeded))

def get_document(id):
    result = search_client.get_document(id)

    print("Details:")
    print("Filepath: {}".format(result["filepath"]))
    print("Title: {}".format(result["title"]))
    print("URL: {}".format(result["url"]))
    print("Content: {}".format(result["content"]))
    print("ContentVector: {}".format(result["contentVector"]))

def query(query_string):
    results = search_client.search(search_text=query_string, top=5)
    print("Search results:")
    for result in results:
        print("Filepath: {}".format(result["filepath"]))
        print("Title: {}".format(result["title"]))
        print("URL: {}".format(result["url"]))
        print("Content: {}".format(result["content"]))
        print("ContentVector: {}".format(result["contentVector"]))
        print("====================================")

document = {
    "content": "CONTENT OF THE FILE",
    "filepath": "/path/to/file",
    "title": "A good title",
    "url": "https://www.example.com",
    "id": "1234",
    "chunk_id": "",
    "last_updated": "2021-01-01T00:00:00Z"
}
