# Load data

In [13]:
from kubetools import get_all, delete, top, get_gcloud_node_info
from subprocess import check_output
import pandas as pd

KIND = 'staging'

# Grab all pods + pod resources information, merge into one df
df_top = top('pod')
df_get = get_all('pod')
df = pd.merge(df_top, df_get, on='name', how='outer')

# Convert age to hours and sort
df['age'] /= 60.0
df = df.sort_values(['age', 'cpu'], ascending=False)

# Determine if this is a user/prod/etc pod
df['kind'] = df['name'].map(lambda a: a.split('-')[0])

df['restarts'] = pd.to_numeric(df['restarts'])

# Grab node information from both kubectl and gcloud
nodes = get_all('node')
nodes_gcloud = get_gcloud_node_info().rename(columns={'external_ip': 'external-ip', 'status': 'status_gcloud'})
nodes = pd.merge(nodes, nodes_gcloud, on=['name', 'external-ip'])
nodes = nodes[['name', 'internal_ip', 'external-ip', 'status', 'roles', 'age', 'version', 'os-image',
               'kernel-version', 'container-runtime', 'zone', 'machine_type', 'status_gcloud']]

# Pod management

## Deleting pods by query

In [16]:
# Remove pods in error state
# q = 'status in ["Completed", "Error", "Evicted"]'

# Delete non-user pods on a node
# node = "gke-prod-a-ssd-pool-32-134a959a-knlb"
# q = 'fullnode == "{}" and kind not in ["jupyter", "build"]'.format(node)

# Delete the hub / binder pods on a node
# q = 'fullnode == "{}" and kind in ["hub", "binder"]'.format(node)

# Pods older than some age
q = "age >= 1 and kind=='jupyter'"
delete_pods = df.query(q)

print(len(delete_pods))
delete_pods

14


Unnamed: 0,name,cpu,memory,ready,status,restarts,age,ip,node,fullnode,kind
23,jupyter-jupyterlab-2djupyterlab-2ddemo-2dc3hd8glz,1.0,0.068,1/1,Running,0,5.0,10.12.16.155,91w4,gke-prod-a-hm16-e1abb2e9-91w4,jupyter
106,jupyter-annefou-2dmetos-5fpython-2drdqv5tp2,0.0,0.199,1/1,Running,0,5.0,10.12.14.137,0sk3,gke-prod-a-hm16-e1abb2e9-0sk3,jupyter
2,jupyter-jupyterlab-2djupyterlab-2ddemo-2dli1p6iwo,433.0,0.816,1/1,Running,0,4.0,10.12.16.39,91w4,gke-prod-a-hm16-e1abb2e9-91w4,jupyter
25,jupyter-fadhlyemen-2dlilikoi-5ffadhl-2d1puiy7jb,1.0,0.585,1/1,Running,0,2.0,10.12.6.143,x8m7,gke-prod-a-hm16-e1abb2e9-x8m7,jupyter
75,jupyter-jupyterlab-2djupyterlab-2ddemo-2dpjdhx1o3,0.0,0.076,1/1,Running,0,2.0,10.12.6.209,x8m7,gke-prod-a-hm16-e1abb2e9-x8m7,jupyter
107,jupyter-4quantoss-2ddashintro-2dg027ijp2,0.0,0.248,1/1,Running,0,2.0,10.12.3.129,0nc4,gke-prod-a-hm16-e1abb2e9-0nc4,jupyter
42,jupyter-losc-2dtutorial-2dworkspace-2dhznfaev1,0.0,0.054,1/1,Running,0,1.0,10.12.3.198,0nc4,gke-prod-a-hm16-e1abb2e9-0nc4,jupyter
44,jupyter-losc-2dtutorial-2dworkspace-2dfzks5l0d,0.0,0.048,1/1,Running,0,1.0,10.12.16.193,91w4,gke-prod-a-hm16-e1abb2e9-91w4,jupyter
45,jupyter-losc-2dtutorial-2dworkspace-2dflslky6t,0.0,0.041,1/1,Running,0,1.0,10.12.14.220,0sk3,gke-prod-a-hm16-e1abb2e9-0sk3,jupyter
46,jupyter-losc-2dtutorial-2dworkspace-2d5oli05lw,0.0,0.041,1/1,Running,0,1.0,10.12.3.238,0nc4,gke-prod-a-hm16-e1abb2e9-0nc4,jupyter


In [3]:
for nm in delete_pods['name'].values:
    delete(nm, force=True)

Deleted pod: jupyter-jupyterlab-2djupyterlab-2ddemo-2dq2qjydv2
Deleted pod: jupyter-jupyterlab-2djupyterlab-2ddemo-2dytl0iekm
Deleted pod: jupyter-fadhlyemen-2dlilikoi-5ffadhl-2d03slksdw
Deleted pod: jupyter-jupyterlab-2djupyterlab-2ddemo-2d4fe76i52
Deleted pod: jupyter-jtwhite79-2dsire-2dakqj5oag
Deleted pod: jupyter-ipython-2dipython-2din-2ddepth-2dmxv6krpw
Deleted pod: jupyter-jupyterlab-2djupyterlab-2ddemo-2df46ratie
Deleted pod: jupyter-davidtingley-2dpapers-2dnpb8sdsx


## Delete a specific pod

In [4]:
nm = 'binder-5b8b4479cb-gzkq8'
delete(nm)

CalledProcessError: Command '['kubectl', '--namespace=prod', 'delete', 'pod', 'binder-5b8b4479cb-gzkq8']' returned non-zero exit status 1.

# Node management

## Cordon node

In [7]:
nodes

Unnamed: 0,name,internal_ip,external-ip,status,roles,age,version,os-image,kernel-version,container-runtime,zone,machine_type,status_gcloud
0,gke-prod-a-hm16-e1abb2e9-0nc4,10.128.0.3,35.184.140.95,Ready,<none>,15840.0,v1.10.2-gke.1,Container-Optimized OS from Google,4.14.22+,docker://17.3.2,us-central1-a,n1-highmem-16,RUNNING
1,gke-prod-a-hm16-e1abb2e9-0sk3,10.128.0.8,35.239.113.40,Ready,<none>,420.0,v1.10.2-gke.1,Container-Optimized OS from Google,4.14.22+,docker://17.3.2,us-central1-a,n1-highmem-16,RUNNING
2,gke-prod-a-hm16-e1abb2e9-97fx,10.128.0.5,146.148.69.246,"Ready,SchedulingDisabled",<none>,2880.0,v1.10.2-gke.1,Container-Optimized OS from Google,4.14.22+,docker://17.3.2,us-central1-a,n1-highmem-16,RUNNING
3,gke-prod-a-hm16-e1abb2e9-nrs2,10.128.0.2,35.238.161.82,Ready,<none>,780.0,v1.10.2-gke.1,Container-Optimized OS from Google,4.14.22+,docker://17.3.2,us-central1-a,n1-highmem-16,RUNNING
4,gke-prod-a-hm16-e1abb2e9-x8m7,10.128.0.6,35.238.218.76,Ready,<none>,2880.0,v1.10.2-gke.1,Container-Optimized OS from Google,4.14.22+,docker://17.3.2,us-central1-a,n1-highmem-16,RUNNING


In [None]:
!kubectl cordon gke-prod-a-hm16-e1abb2e9-97fx

In [None]:
!kubectl uncordon gke-prod-a-ssd-pool-32-134a959a-5mpp

## Drain a node

In [6]:
! kubectl drain --force --delete-local-data --ignore-daemonsets --grace-period=0 gke-prod-a-hm16-e1abb2e9-97fx

node "gke-prod-a-hm16-e1abb2e9-nrs2" cordoned
pod "prod-prometheus-server-7cdbbf6c88-pkr4d" evicted
pod "metrics-server-v0.2.1-7486f5bd67-g4dx8" evicted
pod "kube-dns-autoscaler-79b4b844b9-gvzqp" evicted
pod "kube-dns-788979dc8f-726xn" evicted
pod "build-rigetticomputing-2dpyquil-2dquantum-2ddice-64f259-f4ee1b" evicted
pod "binder-5b9b78879b-prn5k" evicted
pod "jupyter-ipython-2dipython-2din-2ddepth-2dv3o9l1vf" evicted
node "gke-prod-a-hm16-e1abb2e9-nrs2" drained


## Delete node

In [9]:
!kubectl delete node gke-prod-a-hm16-e1abb2e9-97fx

node "gke-prod-a-hm16-e1abb2e9-97fx" deleted
