In [1]:
import os

import numpy as np
import tensorflow as tf

In [2]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

In [3]:
os.chdir('..')

In [4]:
from src.vnet_modified import vnet_regression

In [5]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.list_physical_devices('GPU')

Num GPUs Available:  1


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [6]:
# Setting seed for reproducibility
os.environ['PYTHONHASHSEED']=str(123)

np.random.seed(123)
tf.random.set_seed(123)

In [7]:
X_train_combined = np.load('./data/X_train_combined.npy')
y_train_combined = np.load('./data/y_train_combined.npy')

In [8]:
X_val = np.load('./data/X_val.npy')
y_val = np.load('./data/y_val.npy')

X_train_combined.shape, y_train_combined.shape, X_val.shape, y_val.shape

((130050, 32, 32, 32, 1),
 (130050, 32, 32, 32, 1),
 (19125, 32, 32, 32, 1),
 (19125, 32, 32, 32, 1))

------
#### Taking a sample of data to make sure the dimensions are fine or not. Later, will try to run it on all samples.

In [9]:
sample_size_train = int(0.9 * X_train_combined.shape[0])
sample_size_val = int(0.9 * X_val.shape[0])

indices_train = np.random.choice(X_train_combined.shape[0], sample_size_train, replace=False)
indices_val = np.random.choice(X_val.shape[0], sample_size_val, replace=False)

X_train_sample = X_train_combined[indices_train]
y_train_sample = y_train_combined[indices_train]
X_val_sample = X_val[indices_val]
y_val_sample = y_val[indices_val]

X_train_sample.shape, y_train_sample.shape, X_val_sample.shape, y_val_sample.shape

((117045, 32, 32, 32, 1),
 (117045, 32, 32, 32, 1),
 (17212, 32, 32, 32, 1),
 (17212, 32, 32, 32, 1))

In [10]:
# Parameters
Ngrid = 32  # grid size

In [11]:
vnet_model = vnet_regression((Ngrid, Ngrid, Ngrid, 1))

2024-02-25 11:39:57.716831: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-25 11:40:02.994238: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38420 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:01:00.0, compute capability: 8.0


In [12]:
# Compile the model
vnet_model.compile(optimizer=Adam(), 
                   loss='mse',
                   metrics=['mae'],
                   run_eagerly=True)

In [13]:
early_stopping = EarlyStopping(monitor='val_loss', 
                               patience=5, 
                               restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', 
                              factor=0.2, 
                              patience=5, 
                              min_lr=0.001)

checkpoint = ModelCheckpoint('models/vnet/250224_90p_samp_augment.hdf5', 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True)

In [14]:
history = vnet_model.fit(x=X_train_sample, 
                         y=y_train_sample, 
                         validation_data=(X_val_sample, y_val_sample), 
                         batch_size=32, 
                         epochs=50, 
                         callbacks=[early_stopping, reduce_lr, checkpoint])

Epoch 1/50


2024-02-25 11:40:24.474402: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8200
2024-02-25 11:40:36.483623: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 1: val_loss improved from inf to 0.63100, saving model to models/vnet/250224_90p_samp_augment.hdf5
Epoch 2/50
Epoch 2: val_loss improved from 0.63100 to 0.62696, saving model to models/vnet/250224_90p_samp_augment.hdf5
Epoch 3/50
Epoch 3: val_loss improved from 0.62696 to 0.47678, saving model to models/vnet/250224_90p_samp_augment.hdf5
Epoch 4/50
Epoch 4: val_loss improved from 0.47678 to 0.43821, saving model to models/vnet/250224_90p_samp_augment.hdf5
Epoch 5/50
Epoch 5: val_loss improved from 0.43821 to 0.27645, saving model to models/vnet/250224_90p_samp_augment.hdf5
Epoch 6/50
Epoch 6: val_loss did not improve from 0.27645
Epoch 7/50
Epoch 7: val_loss did not improve from 0.27645
Epoch 8/50
Epoch 8: val_loss did not improve from 0.27645
Epoch 9/50
Epoch 9: val_loss did not improve from 0.27645
Epoch 10/50
Epoch 10: val_loss did not improve from 0.27645


-------------------

The model improved its performance for the first few epochs. Loss decreases, meaning it's learning. 

At epoch 6, loss spikes, possibly due to an issue with the data or learning rate. 

### Model Saved

In [28]:
vnet_model.summary()

Model: "vnet_regression"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 32,  0           []                               
                                 1)]                                                              
                                                                                                  
 conv3d_1 (Conv3D)              (None, 32, 32, 32,   448         ['input_1[0][0]']                
                                16)                                                               
                                                                                                  
 activation (Activation)        (None, 32, 32, 32,   0           ['conv3d_1[0][0]']               
                                16)                                                 


> Find the model in `/models/vnet/` folder with the name `250224_90p_samp_augment.hdf5`

# Script Complete