# Try n times to create a reservation. returns json or None.
# Using beta API https://compute.googleapis.com/compute/beta/...

In [None]:
!pip install --upgrade -q google-cloud-compute

In [None]:
import google.auth.transport.requests
from google.oauth2 import credentials
import datetime
import requests
import logging
import pprint
import base64
import random
import uuid
import time
import json

In [None]:
# Settings

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

project = "matt-demos"
machines = 1
retries = 1000

# Ref.: https://cloud.google.com/compute/docs/gpus
zones = ["us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"]
machine_type = "a2-highgpu-1g" #a2-highgpu-1g, a2-ultragpu-1g, g2-standard-4
accelerator_type = "nvidia-tesla-a100" #nvidia-a100-80gb, nvidia-tesla-a100, nvidia-l4
accelerator_count = 1
reservation_name = "gpu-reservation-{}".format(uuid.uuid4())

#Auto delete today at 23:59
autodelete = datetime.datetime.now().replace(hour=23, minute=59).astimezone(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%S-05:00')

# Ref.: https://cloud.google.com/compute/docs/reference/rest/beta/reservations/insert#request-body
request = {
  "name": reservation_name,
  "deleteAtTime": autodelete,
  "shareSettings": {
    "shareType": "LOCAL"
  },
  "specificReservation": {
    "count": machines,
    "instanceProperties": {
      "guestAccelerators": [
        {
          "acceleratorCount": accelerator_count,
          "acceleratorType": accelerator_type
        }
      ],
      "machineType": machine_type
    }
  }
}

In [None]:
# Check for operation status 
# Ref.: https://cloud.google.com/compute/docs/reference/rest/v1/zoneOperations/get
def check_resource_grant(credentials, project, zone, oper_id):
    oper_url = "https://compute.googleapis.com/compute/beta/projects/{}/zones/{}/operations/{}".format(project, zone,oper_id)
    while True:
        response = requests.get(oper_url, 
            headers={"Authorization": f"Bearer {credentials.token}"},
        )
        if response.json()['status'] == 'DONE':
            if not 'httpErrorStatusCode' in response.json():
                return response.json()
            else:
                return None
        credentials.refresh(google.auth.transport.requests.Request())
        time.sleep(2)

In [None]:
# Ref.: https://cloud.google.com/compute/docs/reference/rest/v1/reservations/insert
def request_resource_loop(credentials, project, zones, request, retries) -> str:
    logging.info("{}: Starting...".format(datetime.datetime.now()))
    for r in range(1,retries):
        zone = zones[random.randrange(len(zones))]
        url = "https://compute.googleapis.com/compute/beta/projects/{}/zones/{}/reservations".format(project, zone)
        logging.info("{}: Requesting resource ({})".format(datetime.datetime.now(), zone))

        response = requests.post(url, data=json.dumps(request),
        headers={"Content-Type": "application/json",
                "Authorization": f"Bearer {credentials.token}"})
        
        if not "name" in response.json():
            logging.error(response.json())
            break
        
        oper_id = response.json()["name"]

        status = check_resource_grant(credentials, project, zone, oper_id)

        if status:
            logging.info("{}: Resource granted!".format(datetime.datetime.now()))
            return status
        logging.info("Resource unavailable, retrying...{}".format(r+1))
    logging.info("{}: fail to allocate resource.".format(datetime.datetime.now()))

In [None]:
# Get credentails using local service account
credentials, project_id = google.auth.default()
credentials.refresh(google.auth.transport.requests.Request())

# This method returns "None" if unsuccessful; otherwise returns json if the resource was granted.
granted_resource_json = request_resource_loop(credentials, project, zones, request, retries)
pprint.pprint(granted_resource_json)