# Import

In [1]:
import os
from time import sleep
from time import time as unixtime
from typing import Callable, List
import random
from math import ceil
import yaml
import string
import json
from urllib.parse import urlparse
from time import sleep

from dotenv import load_dotenv
from IPython.display import display, clear_output

import boto3
import kubernetes
from kubernetes.client.rest import ApiException

import psycopg2

In [2]:
load_dotenv('/.env')

True

# Define

In [3]:
REGION = 'ca-central-1'
CLUSTER_NAME = 'kubyterlab-llm'
TAGS = {'purpose': CLUSTER_NAME, 'cluster': CLUSTER_NAME}  # Do not change the keys, they are hardcoded throughout.
CLUSTER_TAGS = {'cluster': CLUSTER_NAME}
VOLUME_FILTERS = [
    {'Name': f'tag:purpose', 'Values': ['kubyterlab-llm', 'llm']},
]
K8S_VERSION = os.environ['K8S_VERSION']  # '1.30'
K8S_VERSION = '.'.join(K8S_VERSION.split('.')[:2]) if len(K8S_VERSION.split('.')) > 2 else K8S_VERSION


In [4]:
def wait_until(check: Callable, kwargs: dict, cond: Callable[[dict], bool], timeout: int=60, wait_interval: int=1):
    start = t = unixtime()
    result = check(**kwargs)
    while not cond(result) or t < start + timeout:
        result = check(**kwargs)
        if cond(result):
            return cond(result)
        sleep(wait_interval)
        t = unixtime()
    return cond(result)

In [5]:
def get_subnet_ids_in_vpc(vpc_id: str) -> List[str]:
    subnets_response = ec2_client.describe_subnets()
    subnet_ids = []
    for subnet in subnets_response['Subnets']:
        if subnet['VpcId'] == vpc_id:
            subnet_ids.append(subnet['SubnetId'])
    return subnet_ids

In [6]:
def get_route_table_ids_for_vpc(vpc_id: str):
    response = ec2_client.describe_route_tables()
    rt_ids = []
    for route_table in response['RouteTables']:
        if route_table['VpcId'] == vpc_id:
            rt_ids.append(route_table['RouteTableId'])
    return rt_ids

In [7]:
def get_internet_gateway_ids_attached_to_vpc(vpc_id: str) -> List[str]:
    response = ec2_client.describe_internet_gateways()
    ids = []
    for ig in response['InternetGateways']:
        for attachment in ig.get('Attachments', []):
            if attachment.get('VpcId', '') == vpc_id:
                ids.append(ig['InternetGatewayId'])
    return ids

In [8]:
def get_network_interface_ids_for_vpc(vpc_id: str):
    response = ec2_client.describe_network_interfaces(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
    network_interface_ids = []
    for network_interface in response['NetworkInterfaces']:
        network_interface_ids.append(network_interface['NetworkInterfaceId'])
    return network_interface_ids

In [9]:
def get_security_group_ids_for_vpc(vpc_id: str) -> str:
    response = ec2_client.describe_security_groups(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
    ids = []
    for sg in response['SecurityGroups']:
        if sg['GroupName'] != 'default':
            ids.append(sg['GroupId'])
    return ids

In [10]:
# Not used. TODO: Delete
def get_security_group_ids() -> List[str]:
    response = ec2_client.describe_security_groups()
    ids = []
    for sg in response['SecurityGroups']:
        for tag in sg.get('Tags', []):
            if tag['Key'] == 'purpose' and tag['Value'] == TAGS['purpose']:
                ids.append(sg['GroupId'])
    return ids

In [11]:
def is_snapshot_completed(response: dict) -> bool:
    state = response['Snapshots'][0]['State']
    clear_output(wait=True)
    display(state)
    return state.lower() == 'completed'


In [12]:
def is_zero_volumes(response: dict) -> bool:
    return len(response['Volumes']) == 0

# Instantiate

In [13]:
session = boto3.Session(region_name=REGION)
eks_client = session.client('eks')
ec2_client = session.client('ec2')
iam_client = session.client('iam')
elb_client = session.client('elb')


aws_account_id = boto3.client('sts').get_caller_identity().get('Account')

In [14]:
endpoint = eks_client.describe_cluster(name=CLUSTER_NAME)['cluster']['endpoint']
url = urlparse(endpoint)
url.hostname

'e21ee836301dd62c3455382fc5e26fc7.gr7.ca-central-1.eks.amazonaws.com'

# Delete

In [15]:
response = ec2_client.describe_vpcs(Filters=[{'Name': f'tag:cluster', 'Values': [CLUSTER_NAME]}])
vpc_ids = [vpc['VpcId'] for vpc in response['Vpcs']]
assert len(vpc_ids) == 1
vpc_id = vpc_ids[0]


## Delete LB

In [16]:
load_balancers = elb_client.describe_load_balancers()['LoadBalancerDescriptions']

for lb in load_balancers:
    if lb['VPCId'] == vpc_id:
        load_balancer_name = lb['LoadBalancerName']
        print(load_balancer_name)
        response = elb_client.delete_load_balancer(LoadBalancerName=load_balancer_name)
        print(response)
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200
        break


ab71f0dfd6ab241ce93b33487b0103d5
{'ResponseMetadata': {'RequestId': 'ebe55471-430d-4c46-80b4-82054c108939', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ebe55471-430d-4c46-80b4-82054c108939', 'content-type': 'text/xml', 'content-length': '262', 'date': 'Thu, 20 Feb 2025 03:04:19 GMT'}, 'RetryAttempts': 0}}


## Delete Node Groups

In [17]:
node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)


In [18]:
node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)
for node_group_name in node_groups['nodegroups']:
    response = eks_client.describe_nodegroup(clusterName=CLUSTER_NAME, nodegroupName=node_group_name)
    status = response['nodegroup']['status']

    eks_client.delete_nodegroup(clusterName=CLUSTER_NAME, nodegroupName=node_group_name)

    wait_until(eks_client.list_nodegroups, {'clusterName': CLUSTER_NAME}, lambda x: node_group_name not in x['nodegroups'])
    node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)

eks_client.delete_cluster(name=CLUSTER_NAME)

wait_until(eks_client.list_clusters, {}, lambda x: CLUSTER_NAME not in x['clusters'], wait_interval=3)

security_group_ids = get_security_group_ids()
for group_id in security_group_ids:
    ec2_client.delete_security_group(GroupId=group_id)

oidc_providers_response = iam_client.list_open_id_connect_providers()
for oidc_provider in oidc_providers_response['OpenIDConnectProviderList']:
    arn = oidc_provider['Arn']
    oidc_provider_tags_response = iam_client.list_open_id_connect_provider_tags(OpenIDConnectProviderArn=arn)
    for tag in oidc_provider_tags_response['Tags']:
        if tag['Key'] == 'alpha.eksctl.io/cluster-name' and tag['Value'] == CLUSTER_NAME:
            iam_client.delete_open_id_connect_provider(OpenIDConnectProviderArn=arn)


## Delete VPC

In [19]:
# TODO: Move this to the other notebook. This order is correct.

In [20]:
route_tables = ec2_client.describe_route_tables(Filters=[{"Name": "vpc-id", "Values": [vpc_id]}])["RouteTables"]
for rt in route_tables:
    for route in rt["Routes"]:
        if route.get("GatewayId", "").startswith("igw-"):
            print(f"Deleting route to {route['GatewayId']} in Route Table {rt['RouteTableId']}...")
            ec2_client.delete_route(RouteTableId=rt["RouteTableId"], DestinationCidrBlock=route["DestinationCidrBlock"])

Deleting route to igw-0364896bf027f848d in Route Table rtb-0d0d9158741ef4e34...


In [21]:
# # Probably unnecessary.
# network_interface_ids = get_network_interface_ids_for_vpc(vpc_id)
# for network_interface_id in network_interface_ids:
#     response = ec2_client.delete_network_interface(NetworkInterfaceId=network_interface_id)
#     print(response['ResponseMetadata'])

In [22]:
network_interface_ids = get_network_interface_ids_for_vpc(vpc_id)

enis = ec2_client.describe_network_interfaces(NetworkInterfaceIds=network_interface_ids)['NetworkInterfaces']
for eni in enis:
    if eni['Description'].startswith('ELB'):
        lb_name = eni['Description'].split(' ')[1]
        elb_client.delete_load_balancer(LoadBalancerName=lb_name)


In [23]:
igw_ids = get_internet_gateway_ids_attached_to_vpc(vpc_id)
for igw_id in igw_ids:
    ec2_client.detach_internet_gateway(InternetGatewayId=igw_id, VpcId=vpc_id)
    print(response['ResponseMetadata'])


ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-0e7e227c9a17a5c81 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway.

In [None]:
nat_gateways = ec2_client.describe_nat_gateways(Filters=[{"Name": "vpc-id", "Values": [vpc_id]}])["NatGateways"]
nat_gateways

In [None]:
for igw_id in igw_ids:
    ec2_client.delete_internet_gateway(InternetGatewayId=igw_id)
    print(response['ResponseMetadata'])


In [None]:
subnet_ids = get_subnet_ids_in_vpc(vpc_id)
for subnet_id in subnet_ids:
    response = ec2_client.delete_subnet(SubnetId=subnet_id)
    print(response['ResponseMetadata'])

In [None]:
security_group_ids_for_vpc = get_security_group_ids_for_vpc(vpc_id)
for security_group_id in security_group_ids_for_vpc:
    response = ec2_client.delete_security_group(GroupId=security_group_id)
    print(response['ResponseMetadata'])

In [None]:
response = ec2_client.delete_vpc(VpcId=vpc_id)
response['ResponseMetadata']


In [None]:
route_table_ids = get_route_table_ids_for_vpc(vpc_id)
for route_table_id in route_table_ids:
    # route_table = ec2_client.describe_route_tables(RouteTableIds=[route_table_id])['RouteTables'][0]
    # for route in route_table['Routes']:
    #     if route.get('State') == 'blackhole':
    #         ec2_client.delete_route(RouteTableId=route_table_id, DestinationCidrBlock=route['DestinationCidrBlock'])
    response = ec2_client.delete_route_table(RouteTableId=route_table_id)
    print(response['ResponseMetadata'])

In [None]:
!curl -k $endpoint

# Get `VolumeId`

In [None]:
response = ec2_client.describe_volumes(
    Filters=VOLUME_FILTERS
)

volumes = response.get('Volumes', [])
if not volumes:
    raise RuntimeError(f'No volumes found matching the filter: {VOLUME_FILTERS}')
volume_ids = [volume['VolumeId'] for volume in volumes]
volume_ids

# Create Snapshots

In [None]:
for volume_id in volume_ids:
    response = ec2_client.create_snapshot(
        VolumeId=volume_id,
        Description=f"Snapshot For: {volume_id}. Tags: {TAGS}",
        TagSpecifications=[
            {
                'ResourceType': 'snapshot',
                'Tags': [{'Key': 'purpose', 'Value': 'llm'}]
            }
        ]
    )
    wait_until(ec2_client.describe_snapshots, {'SnapshotIds': [response['SnapshotId']]}, is_snapshot_completed)

In [None]:
snapshot_id = response['SnapshotId']
snapshot_id

# Delete

In [None]:
for volume_id in volume_ids:
    ec2_client.delete_volume(VolumeId=volume_id)
wait_until(ec2_client.describe_volumes, {'Filters': VOLUME_FILTERS}, is_zero_volumes)