# Import

In [1]:
import os
from time import sleep
from time import time as unixtime
from typing import Callable, List
import random
from math import ceil
import yaml
import string
import json
from urllib.parse import urlparse
from time import sleep

from dotenv import load_dotenv
from IPython.display import display, clear_output

import boto3
import kubernetes
from kubernetes.client.rest import ApiException

import psycopg2

In [2]:
load_dotenv('/.env')

True

# Define

In [3]:
REGION = 'ca-central-1'
CLUSTER_NAME = 'kubyterlab-llm'
TAGS = {'purpose': CLUSTER_NAME, 'cluster': CLUSTER_NAME}  # Do not change the keys, they are hardcoded throughout.
CLUSTER_TAGS = {'cluster': CLUSTER_NAME}
CLUSTER_FILTERS = [{'Name': f'tag:{k}', 'Values': [CLUSTER_TAGS[k]]} for k in CLUSTER_TAGS]
K8S_VERSION = os.environ['K8S_VERSION']  # '1.30'
K8S_VERSION = '.'.join(K8S_VERSION.split('.')[:2]) if len(K8S_VERSION.split('.')) > 2 else K8S_VERSION


In [4]:
def wait_until(check: Callable, kwargs: dict, cond: Callable[[dict], bool], timeout: int=60, wait_interval: int=1):
    start = t = unixtime()
    result = check(**kwargs)
    while not cond(result) or t < start + timeout:
        result = check(**kwargs)
        if cond(result):
            return cond(result)
        sleep(wait_interval)
        t = unixtime()
    return cond(result)

In [5]:
def get_subnet_ids_in_vpc(vpc_id: str) -> List[str]:
    subnets_response = ec2_client.describe_subnets()
    subnet_ids = []
    for subnet in subnets_response['Subnets']:
        if subnet['VpcId'] == vpc_id:
            subnet_ids.append(subnet['SubnetId'])
    return subnet_ids

In [6]:
def get_route_table_ids_for_vpc(vpc_id: str):
    response = ec2_client.describe_route_tables()
    rt_ids = []
    for route_table in response['RouteTables']:
        if route_table['VpcId'] == vpc_id:
            rt_ids.append(route_table['RouteTableId'])
    return rt_ids

In [7]:
def get_internet_gateway_ids_attached_to_vpc(vpc_id: str) -> List[str]:
    response = ec2_client.describe_internet_gateways()
    ids = []
    for ig in response['InternetGateways']:
        for attachment in ig.get('Attachments', []):
            if attachment.get('VpcId', '') == vpc_id:
                ids.append(ig['InternetGatewayId'])
    return ids

In [8]:
def get_network_interface_ids_for_vpc(vpc_id: str):
    response = ec2_client.describe_network_interfaces(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
    network_interface_ids = []
    for network_interface in response['NetworkInterfaces']:
        network_interface_ids.append(network_interface['NetworkInterfaceId'])
    return network_interface_ids

In [9]:
def get_security_group_ids_for_vpc(vpc_id: str) -> str:
    response = ec2_client.describe_security_groups(Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}])
    ids = []
    for sg in response['SecurityGroups']:
        if sg['GroupName'] != 'default':
            ids.append(sg['GroupId'])
    return ids

In [10]:
# Not used. TODO: Delete
def get_security_group_ids() -> List[str]:
    response = ec2_client.describe_security_groups()
    ids = []
    for sg in response['SecurityGroups']:
        for tag in sg.get('Tags', []):
            if tag['Key'] == 'purpose' and tag['Value'] == TAGS['purpose']:
                ids.append(sg['GroupId'])
    return ids

In [11]:
def is_snapshot_completed(response: dict) -> bool:
    state = response['Snapshots'][0]['State']
    clear_output(wait=True)
    display(state)
    return state.lower() == 'completed'


In [12]:
def is_zero_volumes(response: dict) -> bool:
    return len(response['Volumes']) == 0

# Instantiate

In [13]:
session = boto3.Session(region_name=REGION)
eks_client = session.client('eks')
ec2_client = session.client('ec2')
iam_client = session.client('iam')
elb_client = session.client('elb')

aws_account_id = boto3.client('sts').get_caller_identity().get('Account')

In [14]:
endpoint = eks_client.describe_cluster(name=CLUSTER_NAME)['cluster']['endpoint']
url = urlparse(endpoint)
url.hostname

'3d9547ae31b419307de001e999ddd051.gr7.ca-central-1.eks.amazonaws.com'

# Delete

In [15]:
response = ec2_client.describe_vpcs(Filters=[{'Name': f'tag:cluster', 'Values': [CLUSTER_NAME]}])
vpc_ids = [vpc['VpcId'] for vpc in response['Vpcs']]
assert len(vpc_ids) == 1
vpc_id = vpc_ids[0]


## Delete LB

In [16]:
load_balancers = elb_client.describe_load_balancers()['LoadBalancerDescriptions']

for lb in load_balancers:
    if lb['VPCId'] == vpc_id:
        load_balancer_name = lb['LoadBalancerName']
        print(load_balancer_name)
        response = elb_client.delete_load_balancer(LoadBalancerName=load_balancer_name)
        print(response)
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200
        break


a7089113cc0ab4998af8896a2a6a1de6
{'ResponseMetadata': {'RequestId': 'd217a6a4-65ee-4677-82b3-3c359034cdda', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd217a6a4-65ee-4677-82b3-3c359034cdda', 'content-type': 'text/xml', 'content-length': '262', 'date': 'Mon, 13 Jan 2025 00:53:08 GMT'}, 'RetryAttempts': 0}}


## Delete Node Groups

In [17]:
node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)


In [18]:
node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)
for node_group_name in node_groups['nodegroups']:
    response = eks_client.describe_nodegroup(clusterName=CLUSTER_NAME, nodegroupName=node_group_name)
    status = response['nodegroup']['status']

    eks_client.delete_nodegroup(clusterName=CLUSTER_NAME, nodegroupName=node_group_name)

    wait_until(eks_client.list_nodegroups, {'clusterName': CLUSTER_NAME}, lambda x: node_group_name not in x['nodegroups'])
    node_groups = eks_client.list_nodegroups(clusterName=CLUSTER_NAME)

eks_client.delete_cluster(name=CLUSTER_NAME)

wait_until(eks_client.list_clusters, {}, lambda x: CLUSTER_NAME not in x['clusters'], wait_interval=3)

security_group_ids = get_security_group_ids()
for group_id in security_group_ids:
    ec2_client.delete_security_group(GroupId=group_id)

oidc_providers_response = iam_client.list_open_id_connect_providers()
for oidc_provider in oidc_providers_response['OpenIDConnectProviderList']:
    arn = oidc_provider['Arn']
    oidc_provider_tags_response = iam_client.list_open_id_connect_provider_tags(OpenIDConnectProviderArn=arn)
    for tag in oidc_provider_tags_response['Tags']:
        if tag['Key'] == 'alpha.eksctl.io/cluster-name' and tag['Value'] == CLUSTER_NAME:
            iam_client.delete_open_id_connect_provider(OpenIDConnectProviderArn=arn)


## Delete VPC

In [19]:
# TODO: Move this to the other notebook. This order is correct.

In [20]:
igw_ids = get_internet_gateway_ids_attached_to_vpc(vpc_id)
for igw_id in igw_ids:
    ec2_client.detach_internet_gateway(InternetGatewayId=igw_id, VpcId=vpc_id)
    print(response['ResponseMetadata'])


{'RequestId': '87e249a1-a0f1-43d1-ac4a-1c12c510abe4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Mon, 13 Jan 2025 01:01:20 GMT', 'content-type': 'application/json', 'content-length': '1348', 'connection': 'keep-alive', 'x-amzn-requestid': '87e249a1-a0f1-43d1-ac4a-1c12c510abe4', 'access-control-allow-origin': '*', 'access-control-allow-headers': '*,Authorization,Date,X-Amz-Date,X-Amz-Security-Token,X-Amz-Target,content-type,x-amz-content-sha256,x-amz-user-agent,x-amzn-platform-id,x-amzn-trace-id', 'x-amz-apigw-id': 'ETTbDHxv4osEPjQ=', 'access-control-allow-methods': 'GET,HEAD,PUT,POST,DELETE,OPTIONS', 'access-control-expose-headers': 'x-amzn-errortype,x-amzn-errormessage,x-amzn-trace-id,x-amzn-requestid,x-amz-apigw-id,date', 'x-amzn-trace-id': 'Root=1-678465df-13f964bd1fa142597cc1e020'}, 'RetryAttempts': 0}


In [21]:
# [m for m in dir(ec2_client) if 'internet_gateway' in m]

In [22]:
for igw_id in igw_ids:
    ec2_client.delete_internet_gateway(InternetGatewayId=igw_id)
    print(response['ResponseMetadata'])


{'RequestId': '87e249a1-a0f1-43d1-ac4a-1c12c510abe4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Mon, 13 Jan 2025 01:01:20 GMT', 'content-type': 'application/json', 'content-length': '1348', 'connection': 'keep-alive', 'x-amzn-requestid': '87e249a1-a0f1-43d1-ac4a-1c12c510abe4', 'access-control-allow-origin': '*', 'access-control-allow-headers': '*,Authorization,Date,X-Amz-Date,X-Amz-Security-Token,X-Amz-Target,content-type,x-amz-content-sha256,x-amz-user-agent,x-amzn-platform-id,x-amzn-trace-id', 'x-amz-apigw-id': 'ETTbDHxv4osEPjQ=', 'access-control-allow-methods': 'GET,HEAD,PUT,POST,DELETE,OPTIONS', 'access-control-expose-headers': 'x-amzn-errortype,x-amzn-errormessage,x-amzn-trace-id,x-amzn-requestid,x-amz-apigw-id,date', 'x-amzn-trace-id': 'Root=1-678465df-13f964bd1fa142597cc1e020'}, 'RetryAttempts': 0}


In [23]:
# Probably unnecessary.
network_interface_ids = get_network_interface_ids_for_vpc(vpc_id)
for network_interface_id in network_interface_ids:
    response = ec2_client.delete_network_interface(NetworkInterfaceId=network_interface_id)
    print(response['ResponseMetadata'])

In [24]:
subnet_ids = get_subnet_ids_in_vpc(vpc_id)
for subnet_id in subnet_ids:
    response = ec2_client.delete_subnet(SubnetId=subnet_id)
    print(response['ResponseMetadata'])

{'RequestId': '3c9afb0c-7121-4161-91b2-0bf061184af7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '3c9afb0c-7121-4161-91b2-0bf061184af7', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'text/xml;charset=UTF-8', 'content-length': '213', 'date': 'Mon, 13 Jan 2025 01:10:19 GMT', 'server': 'AmazonEC2'}, 'RetryAttempts': 0}
{'RequestId': 'bbfa3baa-cc71-4f10-8e73-c2a2ab15d129', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'bbfa3baa-cc71-4f10-8e73-c2a2ab15d129', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'text/xml;charset=UTF-8', 'content-length': '213', 'date': 'Mon, 13 Jan 2025 01:10:20 GMT', 'server': 'AmazonEC2'}, 'RetryAttempts': 0}


In [25]:
security_group_ids_for_vpc = get_security_group_ids_for_vpc(vpc_id)
for security_group_id in security_group_ids_for_vpc:
    response = ec2_client.delete_security_group(GroupId=security_group_id)
    print(response['ResponseMetadata'])

{'RequestId': 'c506cf38-f5ef-4701-bb83-896d3d53bfb5', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c506cf38-f5ef-4701-bb83-896d3d53bfb5', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'text/xml;charset=UTF-8', 'content-length': '266', 'date': 'Mon, 13 Jan 2025 01:10:20 GMT', 'server': 'AmazonEC2'}, 'RetryAttempts': 0}
{'RequestId': '304408cd-3b33-4c7e-a88e-9af8bbe0b23b', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '304408cd-3b33-4c7e-a88e-9af8bbe0b23b', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'text/xml;charset=UTF-8', 'content-length': '266', 'date': 'Mon, 13 Jan 2025 01:10:20 GMT', 'server': 'AmazonEC2'}, 'RetryAttempts': 0}


In [26]:
response = ec2_client.delete_vpc(VpcId=vpc_id)
response['ResponseMetadata']


{'RequestId': 'b84da2ec-0a62-43b6-a49c-ebd408c15c6c',
 'HTTPStatusCode': 200,
 'HTTPHeaders': {'x-amzn-requestid': 'b84da2ec-0a62-43b6-a49c-ebd408c15c6c',
  'cache-control': 'no-cache, no-store',
  'strict-transport-security': 'max-age=31536000; includeSubDomains',
  'content-type': 'text/xml;charset=UTF-8',
  'content-length': '207',
  'date': 'Mon, 13 Jan 2025 01:10:21 GMT',
  'server': 'AmazonEC2'},
 'RetryAttempts': 0}

In [27]:
route_table_ids = get_route_table_ids_for_vpc(vpc_id)
for route_table_id in route_table_ids:
    # route_table = ec2_client.describe_route_tables(RouteTableIds=[route_table_id])['RouteTables'][0]
    # for route in route_table['Routes']:
    #     if route.get('State') == 'blackhole':
    #         ec2_client.delete_route(RouteTableId=route_table_id, DestinationCidrBlock=route['DestinationCidrBlock'])
    response = ec2_client.delete_route_table(RouteTableId=route_table_id)
    print(response['ResponseMetadata'])

In [28]:
!curl -k $endpoint

curl: (6) Could not resolve host: 3D9547AE31B419307DE001E999DDD051.gr7.ca-central-1.eks.amazonaws.com


# Get `VolumeId`

In [29]:
response = ec2_client.describe_volumes(
    Filters=CLUSTER_FILTERS
)

volumes = response.get('Volumes', [])
if not volumes:
    raise RuntimeError(f'No volumes found matching the filter: {CLUSTER_FILTERS}')
volume_ids = [volume['VolumeId'] for volume in volumes]
volume_ids

['vol-0d335c3dfa5fc99c6', 'vol-0e0bf7e648784d088']

# Create Snapshots

In [30]:
for volume_id in volume_ids:
    response = ec2_client.create_snapshot(
        VolumeId=volume_id,
        Description=f"Snapshot For: {volume_id}. Tags: {TAGS}",
        TagSpecifications=[
            {
                'ResourceType': 'snapshot',
                'Tags': [{'Key': k, 'Value': TAGS[k]} for k in TAGS]
            }
        ]
    )
    wait_until(ec2_client.describe_snapshots, {'SnapshotIds': [response['SnapshotId']]}, is_snapshot_completed)

'completed'

In [31]:
snapshot_id = response['SnapshotId']
snapshot_id

'snap-03aa9ba3fdc93cce5'

# Delete

In [32]:
for volume_id in volume_ids:
    ec2_client.delete_volume(VolumeId=volume_id)
wait_until(ec2_client.describe_volumes, {'Filters': CLUSTER_FILTERS}, is_zero_volumes)

True