This notebook we will use to find and deploy the most affordable GPU's on the market with the Shadeform API. We limit our search to machines with 1x80GB A100 machines, but that is easily configurable below.

This notebook leverages the docker container created under `llm-eval-harness-benchmarking` to run basic huggingface benchmarks for a given huggingface model.

It also re-uses code from [find_and_use_gpus.ipynb](link.com) for using the Shadeform API to find available instances.

In [None]:
import requests
import json


In [None]:
base_url = "https://api.shadeform.ai/v1/instances"
instance_type_url = base_url + "/types"
create_url = base_url + "/create"
headers = {
    "X-API-KEY": "<add-your-key-here>", 
    "Content-Type" : "application/json"
}
shade_instance_type = "A6000"
gpu_type = "A6000"
num_gpus = 1

params = {
    'gpu_type' : gpu_type,
    'sort' : 'price',
    'available' : True,
    'num_gpus' : num_gpus
}



In [None]:
response = requests.request("GET", instance_type_url, headers=headers, params=params)
instance_types = json.loads(response.text)["instance_types"]
best_instance = None
region = None
if len(instance_types) > 0:
    best_instance = instance_types[0]
    region = best_instance['availability'][0]['region']
    print(f"The cheapest {gpu_type} instance with {num_gpus} gpu(s) is:", best_instance)
else:
    print(f"No instances of type {gpu_type} instance with {num_gpus} gpu(s) found.")


In [None]:
payload = {
  "cloud": best_instance["cloud"],
  "region": region,
  "shade_instance_type": shade_instance_type,
  "shade_cloud": True,
  "name": "cool_gpu_server",
  "launch_configuration": {
    "type": "docker",
    "docker_configuration": {
      "image": "shadeform/lm-eval-harness",
      "envs": [
      	{
	      	"name": "model",
	      	"value": "mistralai/Mistral-7B-Instruct-v0.2"
      	},
      	{
	      	"name": "tasks",
	      	"value": "hellaswag"
      	},
      ]
    }
  }
}

In [None]:
#request the best instance that is available
response = requests.request("POST", create_url, json=payload, headers=headers)


In [None]:
#easy way to visually see if this request worked
print(response.text)

In [None]:
instance_response = requests.request("GET", base_url, headers=headers)

print(instance_response.text)
instance = json.loads(instance_response.text)["instances"][0]
instance_status = instance['status']
if instance_status == 'active':
    print(f"Instance is active with IP: {instance['ip']}")
else:
    print(f"Instance isn't yet active: {instance}" )