### Deploy Stable Diffusion XL via the command line

In [None]:
# Cloud Shell coomads:

$ gcloud auth application-default login

$ gcloud config set billing/quota_project qwiklabs-asl-04-1e4c51b2847c

$ gcloud ai model-garden models deploy \
--model="stability-ai/stable-diffusion-xl-base@stable-diffusion-xl-base-1.0" \
--region="us-central1" \
--project="qwiklabs-asl-04-1e4c51b2847c" \
--accept-eula \
--machine-type="g2-standard-8" \
--accelerator-type="NVIDIA_L4" \
--container-image-uri="us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/pytorch-inference.cu125.0-1.ubuntu2204.py310" \
--endpoint-display-name="stabilityai_stable-diffusion-xl-1-mg-one-click-deploy"

### Deploy the Falcon Instruct Model

Assessment question:

Given that Falcon-instruct is being fine-tuned using PEFT, which of the following is a common technique used in PEFT to reduce the number of trainable parameters?
 - Training all the layers of the original Falcon model from scratch
 - Converting the model architecture to a recurrent neural network (RNN)
 - Adding small, trainable adapter layers to the existing Falcon model while freezing most of its original parameters  (+)
 - Using a significantly smaller training dataset compared to the original Falcon model

In [None]:
import vertexai
from vertexai import model_garden

vertexai.init(project="qwiklabs-asl-04-1e4c51b2847c", location="us-central1")

model = model_garden.OpenModel("tiiuae/falcon-instruct-7b-peft@falcon-7b-instruct")
endpoint = model.deploy(
  accept_eula=True,
  machine_type="g2-standard-12",
  accelerator_type="NVIDIA_L4",
  accelerator_count=1,
  serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20240620_1616_RC00",
  endpoint_display_name="falcon-instruct-7b-peft-deploy-challenge-lab",
  model_display_name="falcon-instruct-7b-peft-001-1750287659003",
)

### Deploy a Model to the same endpoint

Assessment question:

Which of the following contribute to Vertex AI custom model deployment (select all that apply)?
 - Model registry
 - Accelerators (+)
 - Virtual machine price per hour (+)
 - Disks (+)

### Test the CodeGemma deployment

In [None]:
import google.cloud.logging
import logging

from google.cloud import aiplatform

# Do not remove logging section
client = google.cloud.logging.Client()
client.setup_logging()


# Replace your project number and endpoint id

endpoint_resource_name="projects/1034013642830/locations/us-central1/endpoints/6552857304591499264"
endpoint=aiplatform.Endpoint(endpoint_resource_name)

prompt = "Write a function to list n Fibonacci numbers in Python." 
max_tokens = 500 
temperature = 1.0 
top_p = 1.0  
top_k = 1 

instances = [
    # {
    # Fill in the appropriate configuration
    # },
    {
          "@requestFormat": "chatCompletions",
          "messages": [
              {
                  "role": "user",
                  "content": prompt
              }
          ],
          "max_tokens": max_tokens,
          "temperature": temperature,
          "top_p": top_p,
          "top_k": top_k
    }
]
#response = endpoints["hexllm_tpu"].predict(
response = endpoint.predict(
    instances=instances
)

# "<|file_separator|>" is the end of the file token.
# for prediction in response.predictions:
#     print(prediction.split("<|file_separator|>")[0])


print(response.deployed_model_id)

# Do not remove logging section
log_message = f"Fibonacci function: {response}"
logging.info(log_message)

### Test the Stable Diffusion XL deployment

In [None]:
import base64
from io import BytesIO

from google.cloud import aiplatform
from PIL import Image


def base64_to_image(image_str):
    """Convert base64 encoded string to an image."""
    image = Image.open(BytesIO(base64.b64decode(image_str)))
    return image


def image_grid(imgs, rows=2, cols=2):
    w, h = imgs[0].size
    grid = Image.new(
        mode="RGB", size=(cols * w + 10 * cols, rows * h), color=(255, 255, 255)
    )
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w + 10 * i, i // cols * h))
    return grid

# Replace your project number and endpoint id
endpoint_resource_name="projects/1034013642830/locations/us-central1/endpoints/mg-endpoint-1750285630"
endpoint=aiplatform.Endpoint(endpoint_resource_name)


instances = [
    {
        "prompt": " Generate a photorealistic image of a happy dog running",
        "height": 1024,
        "width": 1024,
    },
]
response = endpoint.predict(instances=instances)

# Do not remove logging section
log_message = f"photorealistic image: {response}"
logging.info(log_message)

images = [
    base64_to_image(response.predictions[0]),
]
image_grid(images, rows=1)

### Test the Falcon deployment