In [36]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
from tensorflow.keras.utils import to_categorical
from io import BytesIO
from tensorflow.python.lib.io import file_io
from tensorflow.keras.layers import (
    Conv3D,
    Dense,
    Dropout,
    Flatten,
    MaxPooling3D,
    Softmax
)

In [37]:
tf.__version__

'2.8.4'

In [38]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  1


In [39]:
# copy file from GCS to this this notebook
# two folders are created:
# 
# /home/jupyter/asl-ml-immersion/notebooks/capstone_project/valid-3d-npy
#/home/jupyter/asl-ml-immersion/notebooks/capstone_project/train-3d-npy

In [40]:
%%bash
ls /home/jupyter/asl-ml-immersion/notebooks/capstone_project/train-3d-npy | cut -d"_" -f5 |sort |uniq -c


    294 Arterial
    132 Late
    152 Non-Contrast
     56 Venous


In [41]:
labels_to_numeric = {
    "Arterial": 0,
    "Late": 1,
    "Non-Contrast": 2,
    "Venous": 3
}

numeric_to_labels = {
    0:   "Arterial",
    1:   "Late",    
    2:   "Non-Contrast",    
    3:  "Venous"
}


#    294 Arterial
#     132 Late
#     152 Non-Contrast
#      56 Venous

In [42]:
def reshape_and_normalize(images):
    
    ### START CODE HERE

    # Reshape the images to add an extra dimension
    images = images.reshape((images.shape[0], images.shape[1], images.shape[2], images.shape[3], 1))
    
    # Normalize pixel values
    max_value = np.max(images)
    images = images/max_value
    
    ### END CODE HERE

    return images, max_value# Reload the images in case you run this cell multiple times

def load_and_format_data_from_gcs(sample_dir):
    # sample_dir="gs://capstone-datasets/train_3d.csv"
    file_list = file_io.read_file_to_string(sample_dir).split("\n")
    images = np.array([np.load(BytesIO(file_io.read_file_to_string(file, binary_mode=True)))
                       for file in file_list if file])
    labels = np.array([os.path.basename(file).split("_")[4] for file in file_list if file])
    labels = np.array([labels_to_numeric[label] for label in labels])
    one_hots = to_categorical(labels)

    images_tranformed, max_value = reshape_and_normalize(images)
    return images_tranformed, one_hots

In [43]:
# Reload the images in case you run this cell multiple times
training_sample_dir = "gs://capstone-datasets/train_3d.csv"
training_images, one_hots = load_and_format_data_from_gcs(training_sample_dir)


print(f"Maximum pixel value after normalization: {np.max(training_images)}\n")
print(f"Shape of training set after reshaping: {training_images.shape}\n")
print(f"Shape of one image after reshaping: {training_images[0].shape}")

Maximum pixel value after normalization: 1.0

Shape of training set after reshaping: (634, 32, 128, 128, 1)

Shape of one image after reshaping: (32, 128, 128, 1)


In [44]:
# Reload the images in case you run this cell multiple times
valid_sample_dir = "gs://capstone-datasets/valid_3d.csv"


# Apply your function
valid_images, one_hots_valid = load_and_format_data_from_gcs(valid_sample_dir)

print(f"Maximum pixel value after normalization: {np.max(valid_images)}\n")
print(f"Shape of training set after reshaping: {valid_images.shape}\n")
print(f"Shape of one image after reshaping: {valid_images[0].shape}")

Maximum pixel value after normalization: 1.0

Shape of training set after reshaping: (90, 32, 128, 128, 1)

Shape of one image after reshaping: (32, 128, 128, 1)


In [45]:
class myCallback(tf.keras.callbacks.Callback):
    # Define the method that checks the accuracy at the end of each epoch
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy') > 0.995:
            print("Reached 99.5% accuracy so cancelling training!")
            self.model.stop_training = True
        
            
            
callbacks = myCallback()

In [46]:
def convolutional_model():
    ### START CODE HERE

    # Define the model
    model = tf.keras.models.Sequential([
        # hub.KerasLayer("https://tfhub.dev/google/HRNet/scannet-hrnetv2-w48/1", trainable=False),
        # tf.keras.layers.Dropout(rate=0.2)
        tf.keras.layers.Conv3D(16, 3, activation='relu',input_shape=training_images.shape[1:]),
        tf.keras.layers.MaxPooling3D(pool_size=(2, 2,2), strides=(2, 2,2), padding='valid'),
        tf.keras.layers.Conv3D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling3D(pool_size=(2, 2,2), strides=(2, 2,2), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, kernel_regularizer=keras.regularizers.l2(l=0.1)),
        tf.keras.layers.Dense(64, activation=tf.keras.activations.relu, kernel_regularizer=keras.regularizers.l2(l=0.1)),
        tf.keras.layers.Dropout(rate=0.20),
        tf.keras.layers.Dense(4),
        tf.keras.layers.Softmax()
        
      
      
      
    ])
    ### END CODE HERE

    # Compile the model
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
        
    return model

In [47]:
model = convolutional_model()

In [48]:
# model.build(input_shape=training_images.shape[1:])

In [49]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_2 (Conv3D)           (None, 30, 126, 126, 16)  448       
                                                                 
 max_pooling3d_2 (MaxPooling  (None, 15, 63, 63, 16)   0         
 3D)                                                             
                                                                 
 conv3d_3 (Conv3D)           (None, 13, 61, 61, 32)    13856     
                                                                 
 max_pooling3d_3 (MaxPooling  (None, 6, 30, 30, 32)    0         
 3D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 172800)            0         
                                                                 
 dense_3 (Dense)             (None, 64)               

In [50]:
callbacks = myCallback()

In [51]:
history = model.fit(x=training_images, y=one_hots, validation_data=(valid_images,one_hots_valid), epochs=40, callbacks=[callbacks])

2023-03-28 18:53:48.458749: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1329594368 exceeds 10% of free system memory.
2023-03-28 18:53:49.244088: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1329594368 exceeds 10% of free system memory.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [59]:
os.chdir("/home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src")
os.getcwd()

'/home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src'

In [60]:
%%bash



BUCKET=capstone-datasets
OUTDIR=phase_contrast_trained
rm -rf ${OUTDIR}
export PYTHONPATH=${PYTHONPATH}:${PWD}/babyweight
python3 -m trainer.task \
    --train_data_path=gs://${BUCKET}/train_3d.csv \
    --eval_data_path=gs://${BUCKET}/valid_3d.csv \
    --output_dir=${OUTDIR} \
    --num_epochs=1 


Maximum pixel value after normalization: 1.0

Shape of training set after reshaping: (634, 32, 128, 128, 1)

Shape of one image after reshaping: (32, 128, 128, 1)
Maximum pixel value after normalization: 1.0

Shape of training set after reshaping: (90, 32, 128, 128, 1)

Shape of one image after reshaping: (32, 128, 128, 1)
Here is our model so far:

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 30, 126, 126, 16)  448       
                                                                 
 max_pooling3d (MaxPooling3D  (None, 15, 63, 63, 16)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 13, 61, 61, 32)    13856     
                                                                 
 max_pooling3d_1 (MaxPooling  (Non

2023-03-28 19:52:27.434116: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-28 19:52:27.563318: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-28 19:52:27.565077: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-28 19:52:27.606814: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

CalledProcessError: Command 'b'\n\n\nBUCKET=capstone-datasets\nOUTDIR=phase_contrast_trained\nrm -rf ${OUTDIR}\nexport PYTHONPATH=${PYTHONPATH}:${PWD}/babyweight\npython3 -m trainer.task \\\n    --train_data_path=gs://${BUCKET}/train_3d.csv \\\n    --eval_data_path=gs://${BUCKET}/valid_3d.csv \\\n    --output_dir=${OUTDIR} \\\n    --num_epochs=1 \n'' returned non-zero exit status 1.

In [62]:
%%bash 
git branch

* containerization_local
  local_container
  main


In [65]:
!git checkout containerization_local

Already on 'containerization_local'
Your branch is up to date with 'origin/feature/containerization_shengbing'.


In [66]:
!git pull

Already up to date.


In [67]:
%%bash
python ./setup.py sdist --formats=gztar


running sdist
running egg_info
creating ct_phase_contrast_trainer.egg-info
writing ct_phase_contrast_trainer.egg-info/PKG-INFO
writing dependency_links to ct_phase_contrast_trainer.egg-info/dependency_links.txt
writing top-level names to ct_phase_contrast_trainer.egg-info/top_level.txt
writing manifest file 'ct_phase_contrast_trainer.egg-info/SOURCES.txt'
reading manifest file 'ct_phase_contrast_trainer.egg-info/SOURCES.txt'
writing manifest file 'ct_phase_contrast_trainer.egg-info/SOURCES.txt'
running check
creating ct_phase_contrast_trainer-0.1
creating ct_phase_contrast_trainer-0.1/ct_phase_contrast_trainer.egg-info
creating ct_phase_contrast_trainer-0.1/trainer
copying files to ct_phase_contrast_trainer-0.1...
copying setup.py -> ct_phase_contrast_trainer-0.1
copying ct_phase_contrast_trainer.egg-info/PKG-INFO -> ct_phase_contrast_trainer-0.1/ct_phase_contrast_trainer.egg-info
copying ct_phase_contrast_trainer.egg-info/SOURCES.txt -> ct_phase_contrast_trainer-0.1/ct_phase_contrast_




In [75]:
%%bash

pwd
ls dist

/home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src
ct_phase_contrast_trainer-0.1.tar.gz


In [81]:
%%bash
BUCKET=capstone-datasets
gsutil cp /home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src/dist/ct_phase_contrast_trainer-0.1.tar.gz gs://${BUCKET}/ct_phase_contrast/

Copying file:///home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src/dist/ct_phase_contrast_trainer-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  3.2 KiB/  3.2 KiB]                                                
Operation completed over 1 objects/3.2 KiB.                                      


In [82]:
!ls /home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src/dist/ct_phase_contrast_trainer-0.1.tar.gz

/home/jupyter/asl-ml-immersion/notebooks/capstone_project/CapStone_Phase_Contrast/src/dist/ct_phase_contrast_trainer-0.1.tar.gz


In [84]:
%%bash
BUCKET=capstone-datasets
gsutil ls gs://${BUCKET}/ct_phase_contrast/

gs://capstone-datasets/ct_phase_contrast/ct_phase_contrast_trainer-0.1.tar.gz


In [85]:
%%bash
cd /home/jupyter/asl-ml-immersion/notebooks/capstone_project
mkdir ct_phase_contrast_trained

train on Vertex AI

In [None]:
%%bash 
cd /home/jupyter/asl-ml-immersion/notebooks/capstone_project/ct_phase_contrast_trained

REGION="us-central1"
BUCKET=capstone-datasets

TIMESTAMP=$(date -u +%Y%m%d_%H%M%S)

OUTDIR=gs://${BUCKET}/ct_phase_contrast/trained_model_$TIMESTAMP
JOB_NAME=ct_phase_contrast_$TIMESTAMP

PYTHON_PACKAGE_URI=gs://${BUCKET}/ct_phase_contrast/ct_phase_contrast_trainer-0.1.tar.gz
PYTHON_PACKAGE_EXECUTOR_IMAGE_URI="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-8:latest"
PYTHON_MODULE=trainer.task

echo > ./config.yaml "workerPoolSpecs:
  machineSpec:
    machineType: n1-standard-4
  replicaCount: 1
  pythonPackageSpec:
    executorImageUri: $PYTHON_PACKAGE_EXECUTOR_IMAGE_URI
    packageUris: $PYTHON_PACKAGE_URI
    pythonModule: $PYTHON_MODULE
    args:
    - --train_data_path=gs://${BUCKET}/train_3d.csv
    - --eval_data_path=gs://${BUCKET}/valid_3d.csv
    - --output_dir=$OUTDIR
    - --num_epochs=40"


gcloud ai custom-jobs create \
  --region=${REGION} \
  --display-name=$JOB_NAME \
  --config=config.yaml