In [1]:
!pip install --quiet google-cloud-secret-manager
!pip install --quiet torch-model-archiver

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/184.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m184.3/184.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.6/184.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from google.cloud import aiplatform
from google.cloud.aiplatform import Model
from google.cloud import secretmanager

In [3]:
DEPLOYED_MODEL_URI = "<BUCKET URI WHERE MODEL IS DEPLOYED>"
BUCKET_URI = "<BUCKET URI>"

res = !gcloud config get core/project
PROJECT_ID = res[0]

In [4]:
TEST_EXAMPLES = [
     'As a healthcare fellow learning diagnosis, What is (are) Adhesions?',
    'As a healthcare fellow learning diagnosis, what research (or clinical trials) is being done for Miller Fisher Syndrome ?',
    'As a healthcare fellow learning diagnosis, What to do for Henoch-Schnlein Purpura '
]

# Prompt template for the training data and the finetuning tests
PROMPT_TEMPLATE = "Instruction:\n{instruction}\n\nResponse:\n{answer}"

TEST_PROMPTS = [
    PROMPT_TEMPLATE.format(instruction=example, answer="")
    for example in TEST_EXAMPLES
]

## Deploy model from bucket to Vertex AI

use Hugginface docker as the image uri for deployment of model to vertex AI


In [5]:
#"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-6:latest"
#us-docker.pkg.dev/vertex-ai/prediction/pytorch-cpu.2-1:latest
DOCKER_URL = "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310"

def deploy_model_to_vertex_from_bucket(
    project_id: str,
    model_location:str,
    display_name:str,
    endpoint_name: str,
    region: str = "us-central1",
    machine_type: str= "g2-standard-4",
    accelerator_type : str = "NVIDIA_L4",
    traffic_percentage: int = 100):
    aiplatform.init(project=project_id, location=region, staging_bucket=BUCKET_URI)

    model = Model.upload(
        display_name=display_name,
        artifact_uri=model_location,
        serving_container_image_uri= DOCKER_URL,
        serving_container_environment_variables={
             "NUM_SHARD": "1",
            "MAX_INPUT_TOKENS": "512",
            "MAX_TOTAL_TOKENS": "1024",
            "MAX_BATCH_PREFILL_TOKENS": "1512",
        },
        serving_container_ports=[8080]
    )

    endpoint = aiplatform.Endpoint.create(display_name = endpoint_name)

    deployed = model.deploy(
        endpoint= endpoint,
        machine_type=machine_type,
        accelerator_type=accelerator_type,
        accelerator_count=1)

    print(f"Model {display_name} deployed to {endpoint.resource_name}")
    return deployed, endpoint


In [None]:
deployed_model, endpoint = deploy_model_to_vertex_from_bucket(
    project_id=PROJECT_ID,
    display_name="MEDQUAD-Question-Answer-Agent",
    model_location=DEPLOYED_MODEL_URI,
    endpoint_name="medquad-finetunned-gemma2"
)

In [7]:
print(endpoint)

<google.cloud.aiplatform.models.Endpoint object at 0x7e008c2eef80> 
resource name: projects/59763918748/locations/us-central1/endpoints/1095466524497412096


## Download bucket Content

The bucket content is downloaded to local disk to compare output from deployed model against HF model

In [37]:
!pip install crcmod

Collecting crcmod
  Downloading crcmod-1.7.tar.gz (89 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/89.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.7/89.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: crcmod
  Building wheel for crcmod (setup.py) ... [?25l[?25hdone
  Created wheel for crcmod: filename=crcmod-1.7-cp310-cp310-linux_x86_64.whl size=31406 sha256=87f29d06456f3f478bd58c7afb7c1f1d91b4b5cdab8f496c94df8d79dfd868e0
  Stored in directory: /root/.cache/pip/wheels/85/4c/07/72215c529bd59d67e3dac29711d7aba1b692f543c808ba9e86
Successfully built crcmod
Installing collected packages: crcmod
Successfully installed crcmod-1.7


In [None]:
!gsutil -m cp \
"<<BUCKETS CONTENTS>>" \
   /content/gemma

## Test deployed model

Test the non-deployed finetuned model's output against the deployed version

In [39]:
from huggingface_hub import get_token
import transformers
import os
from google.cloud import storage

model = transformers.GemmaForCausalLM.from_pretrained(
    "/content/gemma",
    local_files_only=True,
    device_map="auto",  # Library "accelerate" to auto-select GPU
)
tokenizer = transformers.GemmaTokenizer.from_pretrained(
    "/content/gemma",
    local_files_only=True,
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [40]:
def test_transformers_model(
    model: transformers.GemmaForCausalLM,
    tokenizer: transformers.GemmaTokenizer,
) -> None:
    for prompt in TEST_PROMPTS:
        inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_length=500)

        output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"{output}\n{'- '*40}")


test_transformers_model(model, tokenizer)

Instruction:
As a healthcare fellow learning diagnosis, What is (are) Adhesions?

Response:
Adhesions are a common finding in the abdomen and pelvis. They are a result of scar tissue formation. They can be found in the abdomen, pelvis, and chest. They can be found in the abdomen, pelvis, and chest.

Instruction:
As a healthcare fellow learning diagnosis, What is (are) Adhesions?

Response:
Adhesions are a common finding in the abdomen and pelvis. They are a result of scar tissue formation. They can be found in the abdomen, pelvis, and chest. They can be found in the abdomen, pelvis, and chest.

Instruction:
As a healthcare fellow learning diagnosis, What is (are) Adhesions?

Response:
Adhesions are a common finding in the abdomen and pelvis. They are a result of scar tissue formation. They can be found in the abdomen, pelvis, and chest. They can be found in the abdomen, pelvis, and chest.

Instruction:
As a healthcare fellow learning diagnosis, What is (are) Adhesions?

Response:
Adhes

In [41]:
def test_vertexai_endpoint(endpoint: aiplatform.Endpoint):
    for question, prompt in zip(TEST_EXAMPLES, TEST_PROMPTS):
        instance = {
            "inputs": f"{question}",
            "parameters": {
                # "max_new_tokens": 128,
                # "do_sample": True,
                # "top_p": 0.95,
                # "temperature": 0.7,
            }
            # "inputs": prompt,
            # "max_tokens": 10,
            #"temperature": 0.0,
            #"top_p": 1.0,
            #"top_k": 1,
            # "raw_response": True,?

            }
        response = endpoint.predict(instances=[instance])
        output = response.predictions[0]
        print(f"{question}\n{output}\n{'- '*40}")


test_vertexai_endpoint(endpoint)

As a healthcare fellow learning diagnosis, What is (are) Adhesions?
 You readily know what they are as a patient or ... >what do you know about Adhesions? If not, mention the general functions or aetiologies or ... >Are you planning to go through IVF in future, and is_{[Asking wrong thing but] asking about role play in IVF> what do [I should] do after the Knees Are: Replacement of Adult | Growth | Aging | Regeneration ; is [go for copy it from Billions of Pages] or How about trying some shake
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
As a healthcare fellow learning diagnosis, what research (or clinical trials) is being done for Miller Fisher Syndrome ?


Answer:
Miller Fisher Syndrome is a rare condition that affects the development of nerves. There are currently no known treatments for Miller Fisher Syndrome, but research is ongoing in order to find a potential cure. Clinical trials are being conducted in order to evaluate the effectiveness of po

## Clean up

In [None]:
delete_model = True
delete_objects = False
delete_bucket = False

if delete_model:
    deployed_model.undeploy_all()
    endpoint.delete(force=True)
    model.delete()
# if delete_objects:
#     !gcloud storage rm --recursive $BUCKET_URI/**
# if delete_bucket:
#     !gcloud storage buckets delete $BUCKET_URI