In [1]:
from pathlib import Path

cfg_path = Path.home().resolve() / "config"

if cfg_path.is_file():
    print(f"Found config: {cfg_path}")
else: 
    cfg_dir = input("Enter directory with config: ")
    cfg_path = Path(cfg_dir).resolve() / "config"
    if cfg_path.is_file():
        print(f"Found config: {cfg_path}")
    else:
        print("ERROR: Re-run this cell and give correct directory")

Found config: /home/user1-selab3/Documents/research-shradha/kube/kube1-sklearn/config


In [2]:
from shutil import copy
from pathlib import Path

dest_dir = Path.home().resolve() / ".kube"
dest_dir.mkdir(exist_ok=True)
dest_path = dest_dir / "config"

if dest_path.is_file():
    print("SUCCESS: Config correctly configured")
elif cfg_path.is_file() and dest_dir.is_dir():
    copy(cfg_path, dest_path)
    print("SUCCESS: Copied config")
else:
    print("ERROR: Ensure you have correct config path")

SUCCESS: Config correctly configured


In [3]:
! kubectl config view --minify -o jsonpath='{..namespace}'

gp-engine-unoselab01

In [4]:
from jinja2 import Template

# read in the template
with open('/home/user1-selab3/Documents/research-shradha/kube/kube2/example1-kube/CODE-RajulShakywar/CODE/example/yaml/yaml_templates/pvc_template.yml') as file_:
    template = Template(file_.read())

In [5]:
# replace None 
PVC_NAME = 'pvc-shradha-gp-engine-unoselab01'

pvc_spec = template.render(name=PVC_NAME)

In [6]:
print(pvc_spec)

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: pvc-shradha-gp-engine-unoselab01
spec:
  storageClassName: rook-cephfs-central
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 50Gi


In [7]:
with open("/home/user1-selab3/Documents/research-shradha/kube/kube1-sklearn/1-kube-pvc-sklean.yml", "w") as file:
    file.write(pvc_spec)

In [19]:
! kubectl create -f ../kube1-sklearn/1-kube-pvc-sklean.yml

Error from server (AlreadyExists): error when creating "../kube1-sklearn/1-kube-pvc-sklean.yml": persistentvolumeclaims "pvc-shradha-gp-engine-unoselab01" already exists


In [20]:
! kubectl get pvc

NAME                               STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS          AGE
pvc-gp-engine-unoselab01           Bound    pvc-3689372b-bf1f-40b3-b2aa-f2ed83257150   50Gi       RWX            rook-cephfs-central   50d
pvc-shradha-gp-engine-unoselab01   Bound    pvc-d12e42d5-6421-4540-ac64-3beeae853d13   50Gi       RWX            rook-cephfs-central   6d1h


In [14]:
from torchvision.datasets import MNIST
from skimage.feature import hog
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from tqdm import tqdm
import numpy as np
import os

NUM_TREES = int(os.environ.get("SK_NUM_TREES", "3"))
NUM_JOBS = int(os.environ.get("SK_NUM_JOBS", "1"))

print(f"Running random forest with {NUM_TREES} trees and {NUM_JOBS} jobs")

######
# Download MNIST
######
train_dataset = MNIST(download=True, root="~/data", train=True)
test_dataset = MNIST(download=True, root="~/data", train=False)

##### 
# Generate Train Features
#####
print("Generating Train Features")
train_features = np.empty((len(train_dataset), 108))
train_labels = np.empty(len(train_dataset), np.int32)
for i, (img, label) in tqdm(enumerate(train_dataset), ncols=80, total=len(train_dataset)):
    train_features[i] = hog(np.asarray(img), orientations=12, cells_per_block=(3,3))
    train_labels[i] = label

#####
# Generate Test Features
#####
print("Generating Test Features")
test_features = np.empty((len(test_dataset), 108))
test_labels = np.empty(len(test_dataset), np.int32)
for i, (img, label) in tqdm(enumerate(test_dataset), ncols=80, total=len(test_dataset)):
    test_features[i] = hog(np.asarray(img), orientations=12, cells_per_block=(3,3))
    test_labels[i] = label

######
# Train Model
#######
print("Training the model")
model = RandomForestClassifier(n_estimators=NUM_TREES, n_jobs=NUM_JOBS, verbose=1)
model.fit(train_features, train_labels)

####
# Score Model
#####
print("Evaluating the model")
model_accuracy = model.score(test_features, test_labels)
print(f"Model Accuracy = {model_accuracy*100:.2f}%")

Running random forest with 3 trees and 1 jobs
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to /home/user1-selab3/data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:05<00:00, 1977944.89it/s]


Extracting /home/user1-selab3/data/MNIST/raw/train-images-idx3-ubyte.gz to /home/user1-selab3/data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to /home/user1-selab3/data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 800621.89it/s]


Extracting /home/user1-selab3/data/MNIST/raw/train-labels-idx1-ubyte.gz to /home/user1-selab3/data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to /home/user1-selab3/data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2133067.57it/s]


Extracting /home/user1-selab3/data/MNIST/raw/t10k-images-idx3-ubyte.gz to /home/user1-selab3/data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to /home/user1-selab3/data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1773792.25it/s]


Extracting /home/user1-selab3/data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /home/user1-selab3/data/MNIST/raw

Generating Train Features


100%|███████████████████████████████████| 60000/60000 [00:09<00:00, 6057.29it/s]


Generating Test Features


100%|███████████████████████████████████| 10000/10000 [00:01<00:00, 6066.27it/s]
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training the model
Evaluating the model
Model Accuracy = 82.20%


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s finished


In [16]:
! kubectl create -f ../kube1-sklearn/2-kube-pod-sklean.yml

pod/shradha-gp-engine-unoselab01-pod created


In [17]:
! kubectl get pods

NAME                               READY   STATUS    RESTARTS   AGE
gp-engine-unoselab01-pod1          0/1     Error     0          27h
shradha-gp-engine-unoselab01-pod   1/1     Running   0          30s


In [23]:
! kubectl cp /home/user1-selab3/Documents/research-shradha/kube/kube1-sklearn/RandomForestMNIST.py shradha-gp-engine-unoselab01-pod:/data/RandomForestMNIST.py

In [24]:
! kubectl exec shradha-gp-engine-unoselab01-pod -- cat /data/RandomForestMNIST.py

from torchvision.datasets import MNIST
from skimage.feature import hog
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from tqdm import tqdm
import numpy as np
import os

NUM_TREES = int(os.environ.get("SK_NUM_TREES", "3"))
NUM_JOBS = int(os.environ.get("SK_NUM_JOBS", "1"))

print(f"Running random forest with {NUM_TREES} trees and {NUM_JOBS} jobs")

######
# Download MNIST
######
train_dataset = MNIST(download=True, root="~/data", train=True)
test_dataset = MNIST(download=True, root="~/data", train=False)

##### 
# Generate Train Features
#####
print("Generating Train Features")
train_features = np.empty((len(train_dataset), 108))
train_labels = np.empty(len(train_dataset), np.int32)
for i, (img, label) in tqdm(enumerate(train_dataset), ncols=80, total=len(train_dataset)):
    train_features[i] = hog(np.asarray(img), orientations=12, cells_per_block=(3,3))
    train_labels[i] = label

#####
# Generate Test Features
#####
print("Generating Test Features")

In [25]:
! ls

1-kube-pvc-create-sklean.yml  3-kube-job-sklean.yml  RandomForestMNIST.py
1-kube-pvc-sklean.yml	      config
2-kube-pod-sklean.yml	      learn.ipynb


In [26]:
from jinja2 import Template

# read in the template
with open('/home/user1-selab3/Documents/research-shradha/kube/kube2/example1-kube/CODE-RajulShakywar/CODE/example/yaml/yaml_templates/sklearn_job_template.yml') as file_:
    template = Template(file_.read())

In [27]:
# render the job spec
job_spec = template.render(
    job_name="job01-sklearn-shradha-gp-engine-unoselab01",
    pvc_name="pvc-shradha-gp-engine-unoselab01",
    num_trees=1,
    num_jobs=1
)

# print the job spec
print(job_spec)

apiVersion: batch/v1
kind: Job
metadata:
  name: job01-sklearn-shradha-gp-engine-unoselab01
spec:
  template:
    spec:
      automountServiceAccountToken: false
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: topology.kubernetes.io/region
                operator: In
                values:
                - us-central 
      containers:
      - name: sklearn-train-container
        image: gitlab-registry.nrp-nautilus.io/gp-engine/jupyter-stacks/bigdata-2023:latest
        workingDir: /data
        env:
            - name: SK_NUM_TREES
              value: "1"
            - name: SK_NUM_JOBS
              value: "1"
        command: ["python3", "/data/RandomForestMNIST.py"]
        volumeMounts:
            - name: pvc-shradha-gp-engine-unoselab01
              mountPath: /data
        resources:
            limits:
              memory: 1Gi
             

In [28]:
with open("/home/user1-selab3/Documents/research-shradha/kube/kube1-sklearn/3-kube-job-sklean.yml", "w") as file:
    file.write(job_spec)

In [29]:
! kubectl create -f ../kube1-sklearn/3-kube-job-sklean.yml

job.batch/job01-sklearn-shradha-gp-engine-unoselab01 created


In [30]:
! kubectl get pods

NAME                                               READY   STATUS              RESTARTS   AGE
gp-engine-unoselab01-pod1                          0/1     Error               0          28h
job01-sklearn-shradha-gp-engine-unoselab01-72mr8   0/1     ContainerCreating   0          14s
shradha-gp-engine-unoselab01-pod                   1/1     Running             0          16m


In [32]:
! kubectl logs --tail=1 job01-sklearn-shradha-gp-engine-unoselab01-72mr8

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [None]:
# ! kubectl delete job job01-sklearn-shradha-gp-engine-unoselab01-72mr8

In [None]:
# ! kubectl delete pod sklearn-gp-engine-unoselab01

In [None]:
# !kubectl get pods