Skip to content
This repository has been archived by the owner on Dec 4, 2023. It is now read-only.

Check update and trigger update #121

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
# For Dev Registry: https://raw.githubusercontent.com/premAI-io/prem-registry/dev/manifests.json
PREM_REGISTRY_URL=https://raw.githubusercontent.com/premAI-io/prem-registry/main/manifests.json

# Prem Daemon
# ------------------------------------------------------------------------------------------
DEFAULT_PORT='8000'
PREMD_IMAGE=ghcr.io/premai-io/premd

# Sentry
# ------------------------------------------------------------------------------------------
SENTRY_DSN=https://75592545ad6b472e9ad7c8ff51740b73@o1068608.ingest.sentry.io/4505244431941632
2 changes: 2 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
DEBUG: bool = os.getenv("DEBUG", False)
SECRET_KEY: Secret = Secret(os.getenv("SECRET_KEY", ""))
PROJECT_NAME: str = os.getenv("PROJECT_NAME", "Prem Daemon")
PREMD_IMAGE: str = os.getenv("PREMD_IMAGE", "ghcr.io/premai-io/premd")
DEFAULT_PORT: int = int(os.getenv("DEFAULT_PORT", "8000"))

# APIs
# ------------------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions app/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,23 @@

def create_start_app_handler(app: FastAPI):
def start_app() -> None:
container_name, new_container_name = utils.generate_container_name("premd")
client = utils.get_docker_client()
if utils.container_exists(container_name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should there be an else statement following this if? Don't we need to create a new container if it doesn't already exist?

container = client.containers.get(container_name)
host_port = container.ports.get(f"{utils.DEFAULT_PORT}/tcp", [None])[0][
"HostPort"
]
if host_port != f"{utils.DEFAULT_PORT}":
utils.check_host_port_availability(utils.DEFAULT_PORT)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if this returns False? If you ignore the return value of this function, why call it at all?

_ = utils.create_new_container(
utils.PREMD_IMAGE,
"latest",
new_container_name,
container_name,
utils.DEFAULT_PORT,
)
utils.update_and_remove_old_container(container_name)
nsosio marked this conversation as resolved.
Show resolved Hide resolved
for registry in config.PREM_REGISTRY_URL.strip().split():
utils.add_services_from_registry(registry)

Expand Down
147 changes: 147 additions & 0 deletions app/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import logging
import re
import subprocess
import time
import xml.etree.ElementTree as ET

import docker
import requests
import torch
from bs4 import BeautifulSoup
nsosio marked this conversation as resolved.
Show resolved Hide resolved

from app.core import config

logger = logging.getLogger(__name__)

PREMD_IMAGE = config.PREMD_IMAGE
DEFAULT_PORT = config.DEFAULT_PORT
SERVICES = []
REGISTRIES = config.PREM_REGISTRY_URL.strip().split()
INTERFACES = [
Expand Down Expand Up @@ -197,3 +202,145 @@ def get_gpu_info():
mem_percentage = (used_memory_value / total_memory_value) * 100

return gpu_name, total_memory_value, used_memory_value, mem_percentage


def extract_labels_from_html_file(html_content, class_names):
soup = BeautifulSoup(html_content, "html.parser")
labels = soup.select(class_names)
return (label.get_text() for label in labels)


def find_maximum_label(labels):
pattern = re.compile(r"v\d+\.\d+\.\d+$")
return max(filter(pattern.match, labels), default=None)
nsosio marked this conversation as resolved.
Show resolved Hide resolved


def get_premd_last_tag(owner, repository, package):
response = requests.get(
f"https://github.com/{owner}/{repository}/pkgs/container/{package}"
Copy link
Contributor

@Janaka-Steph Janaka-Steph Sep 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not calling the API instead?
Something like this, with an authentication token if necessary:

def get_premd_last_tag():
    try:
        url = f'https://api.github.com/orgs/premai-io/packages/container/premd/versions'
        headers = {
            'Accept': 'application/vnd.github+json'
        }
        response = requests.get(url, headers)
        if response.status_code == 200:
            data = response.json()
            if data:
                # Find the latest version by sorting by created_at in descending order
                latest_version = max(data, key=lambda x: x['created_at'])
                return latest_version['name']
            else:
                print("No versions found for the image.")
        else:
            logger.error(f"Failed to retrieve data from GitHub API. Status code: {response.status_code}")
            return None
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return None

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because we'd have to hardcode an auth token (this particular API requires authentication even to access public info)

)
try:
labels = extract_labels_from_html_file(
response.content, ".Label.mr-1.mb-2.text-normal"
)
except Exception as e:
logger.info(f"Unexpected error: {e}")
nsosio marked this conversation as resolved.
Show resolved Hide resolved
else:
return find_maximum_label(labels)


def get_local_docker_image_tags(owner, repository):
try:
client = get_docker_client()
image = client.images.get(f"ghcr.io/{owner}/{repository}")
return image.tags
except Exception as e:
logger.info(f"Unexpected error: {e}")
return []


def generate_container_name(prefix):
client = get_docker_client()

containers = client.containers.list(
all=True, filters={"name": f"^{prefix}", "status": "running"}
)
latest_suffix = -1
for container in containers:
match = re.match(rf"{prefix}_(\d+)", container.name)
if match:
suffix = int(match.group(1))
if suffix > latest_suffix:
latest_suffix = suffix

if latest_suffix == -1:
return prefix, f"{prefix}_1"
else:
return f"{prefix}_{latest_suffix}", f"{prefix}_{latest_suffix+1}"
nsosio marked this conversation as resolved.
Show resolved Hide resolved


def create_new_container(
image_name, image_tag, new_container_name, old_container_name, host_port
nsosio marked this conversation as resolved.
Show resolved Hide resolved
):
client = get_docker_client()
old_container = client.containers.get(old_container_name)

if is_gpu_available():
device_requests = [
docker.types.DeviceRequest(device_ids=["all"], capabilities=[["gpu"]])
]
else:
device_requests = []

volumes = {}
for mount in old_container.attrs["Mounts"]:
source = mount["Source"]
target = mount["Destination"]
mode = mount["Mode"]
volumes[source] = {"bind": target, "mode": mode}

current_ports = old_container.attrs["HostConfig"]["PortBindings"]
current_port_key = list(current_ports.keys())[0]

logger.info(
f"Starting new container {new_container_name} with image {image_name}:{image_tag} at port {host_port}"
nsosio marked this conversation as resolved.
Show resolved Hide resolved
)
new_container = client.containers.run(
nsosio marked this conversation as resolved.
Show resolved Hide resolved
image=f"{image_name}:{image_tag}",
name=new_container_name,
ports={f"{current_port_key}/tcp": [{"HostIp": "", "HostPort": host_port}]},
nsosio marked this conversation as resolved.
Show resolved Hide resolved
volumes=volumes,
environment=old_container.attrs["Config"]["Env"],
device_requests=device_requests,
network_mode=old_container.attrs["HostConfig"]["NetworkMode"],
detach=True,
)
return new_container


def update_and_remove_old_container(old_container_name):
client = get_docker_client()
logger.info(f"Stopping {old_container_name}")
old_container = client.containers.get(old_container_name)
old_container.stop()


def update_container(host_port):
container_name, new_container_name = generate_container_name("premd")
create_new_container(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aren't we missing here:

  • pull get_premd_last_tag()
  • update latest tag to point to above
  • untag old tag
  • after removing old container, prune old image tag

PREMD_IMAGE, "latest", new_container_name, container_name, host_port
)
update_and_remove_old_container(container_name)
nsosio marked this conversation as resolved.
Show resolved Hide resolved


def check_host_port_availability(host_port, timeout=30):
start_time = time.time()
client = get_docker_client()

while True:
if time.time() - start_time > timeout:
return False

containers = client.containers.list()
port_used = any(
f"{host_port}/tcp" in container.ports
for container in containers
if container.status == "running"
)

if not port_used:
return True

time.sleep(1)


def container_exists(container_name):
try:
client = get_docker_client()
_ = client.containers.get(container_name)
return True
except docker.errors.NotFound:
return False
except docker.errors.APIError as e:
logging.error(f"Error checking container existence: {e}")
return False
49 changes: 49 additions & 0 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,55 @@ async def health():
return schemas.HealthResponse(status=True)


@router.get(
"/update-available/",
responses={
400: {
"model": schemas.ErrorResponse,
"description": "Failed to check update available.",
}
},
response_model=schemas.UpdateAvailableResponse,
)
async def update_available():
try:
owner = "premAI-io"
remote_image = f"{utils.PREMD_IMAGE}:{utils.get_premd_last_tag(owner, 'prem-daemon', 'premd')}"
local_tags = utils.get_local_docker_image_tags(owner.lower(), "premd")
return {
"remote_image": remote_image,
"local_images": local_tags,
"update": remote_image not in local_tags,
}
except Exception as error:
logger.error(error)
raise HTTPException(
status_code=400,
detail={"message": f"Failed to check update available {error}."},
) from error


@router.get(
"/update-daemon/",
responses={
400: {
"model": schemas.ErrorResponse,
"description": "Failed to update.",
}
},
response_model=schemas.UpdateAvailableResponse,
)
async def trigger_update():
try:
utils.update_container(utils.DEFAULT_PORT + 1)
nsosio marked this conversation as resolved.
Show resolved Hide resolved
except Exception as error:
logger.error(error)
raise HTTPException(
status_code=400,
detail={"message": f"Failed to update {error}."},
) from error


@router.get("/interfaces/", response_model=list[schemas.InterfaceResponse])
async def interfaces():
return utils.get_interfaces()
Expand Down
6 changes: 6 additions & 0 deletions app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ class RunServiceInput(BaseModel):
id: str


class UpdateAvailableResponse(BaseModel):
remote_image: str
local_images: list[str]
update: bool


class ServiceInput(BaseModel):
id: str
name: str
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ torchvision==0.15.2
torchaudio==2.0.2
sentry-sdk==1.26.0
psutil==5.9.5
beautifulsoup4==4.12.2