In [None]:
#https://console.cloud.google.com/vertex-ai/matching-engine/indexes?project=ecg-ai-416210

In [1]:
import os
import urllib.request

if not os.path.exists("utils"):
    os.makedirs("utils")

url_prefix = "https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/language/use-cases/document-qa/utils"
files = ["__init__.py", "matching_engine.py", "matching_engine_utils.py"]

for fname in files:
    urllib.request.urlretrieve(f"{url_prefix}/{fname}", filename=f"utils/{fname}")

In [2]:
import uuid
import numpy as np
import json
from utils.matching_engine_utils import MatchingEngineUtils

In [3]:

def create_index_and_endpoint(
    CHATBOT_ID: str,
    PROJECT_ID: str = "ecg-ai-416210",
    REGION: str = "europe-west1" ,
    ME_DIMENSIONS: int = 768
    )->dict:

    ME_INDEX_NAME = f"{CHATBOT_ID}-me-index"  # @param {type:"string"}
    ME_EMBEDDING_DIR = f"{CHATBOT_ID}/me-bucket"  # @param {type:"string"}


    # dummy embedding
    init_embedding = {"id": str(uuid.uuid4()), "embedding": list(np.zeros(ME_DIMENSIONS))}

    # dump embedding to a local file
    with open("embeddings_0.json", "w") as f:
        json.dump(init_embedding, f)

    # write embedding to Cloud Storage
    ! set -x && gsutil cp embeddings_0.json gs://{ME_EMBEDDING_DIR}/init_index/embeddings_0.json

    mengine = MatchingEngineUtils(PROJECT_ID, REGION, ME_INDEX_NAME)

    # Create index
    index = mengine.create_index(
        embedding_gcs_uri=f"gs://{ME_EMBEDDING_DIR}/init_index",
        dimensions=ME_DIMENSIONS,
        index_update_method="streaming",
        index_algorithm="tree-ah",
    )
    if index:
        print(index.name)

    # Deploy index to endpoint
    index_endpoint = mengine.deploy_index()
    if index_endpoint:
        print(f"Index endpoint resource name: {index_endpoint.name}")
        print(
            f"Index endpoint public domain name: {index_endpoint.public_endpoint_domain_name}"
        )
        print("Deployed indexes on the index endpoint:")
        for d in index_endpoint.deployed_indexes:
            print(f" {d.id}")

    ME_INDEX_ID, ME_INDEX_ENDPOINT_ID = mengine.get_index_and_endpoint()

    ME_INDEX_ID, ME_INDEX_ENDPOINT_ID

    me_dict = {
        "PROJECT_ID": PROJECT_ID,
        "LOCATION": REGION,
        "CHATBOT_NAME": CHATBOT_ID,
        "ME_INDEX_ID": ME_INDEX_ID,
        "ME_INDEX_ENDPOINT_ID": ME_INDEX_ENDPOINT_ID,
        "ME_INDEX_NAME": ME_INDEX_NAME,
        "ME_EMBEDDING_DIR": ME_EMBEDDING_DIR,
        "ME_DIMENSIONS": 768
    }

    file_path = f"{CHATBOT_ID}_me.json"

    # Open the file in write mode
    with open(file_path, "w") as json_file:
        # Dump the dictionary to the file
        json.dump(me_dict, json_file)

    print(f"Index and Endpoint created, you can find its parameters under {file_path}")

    return me_dict

In [9]:
chatbot_id = "ecg_dpo"

In [4]:
create_index_and_endpoint(chatbot_id)

+ gsutil cp embeddings_0.json gs://ecg_dpo/me-bucket/init_index/embeddings_0.json
Copying file://embeddings_0.json [Content-Type=application/json]...
/ [1 files][  3.8 KiB/  3.8 KiB]                                                
Operation completed over 1 objects/3.8 KiB.                                      
projects/500033913879/locations/europe-west1/indexes/2613759041549107200
............................Index endpoint resource name: projects/500033913879/locations/europe-west1/indexEndpoints/9136475433888382976
Index endpoint public domain name: 
Deployed indexes on the index endpoint:
Index and Endpoint created, you can find its parameters under ecg_dpo_me.json


{'PROJECT_ID': 'ecg-ai-416210',
 'LOCATION': 'europe-west1',
 'CHATBOT_NAME': 'ecg_dpo',
 'ME_INDEX_ID': 'projects/500033913879/locations/europe-west1/indexes/2613759041549107200',
 'ME_INDEX_ENDPOINT_ID': 'projects/500033913879/locations/europe-west1/indexEndpoints/9136475433888382976',
 'ME_INDEX_NAME': 'ecg_dpo-me-index',
 'ME_EMBEDDING_DIR': 'ecg_dpo/me-bucket',
 'ME_DIMENSIONS': 768}

In [10]:
print(f"gsutil cp {chatbot_id}_me.json gs://{chatbot_id}/me_parameters/{chatbot_id}_me.json")

gsutil cp ecg_dpo_me.json gs://ecg_dpo/me_parameters/ecg_dpo_me.json


In [11]:
!gsutil cp ecg_dpo_me.json gs://ecg_dpo/me_parameters/ecg_dpo_me.json

Copying file://ecg_dpo_me.json [Content-Type=application/json]...
/ [0 files][    0.0 B/  384.0 B]                                                / [1 files][  384.0 B/  384.0 B]                                                
Operation completed over 1 objects/384.0 B.                                      


In [None]:
print(f"gsutil cp gs://{chatbot_id}/me_parameters/{chatbot_id}_me.json ../vector_store_me_parameters/")