In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import Adam
import keras
from keras.models import Sequential, Model
from keras.layers import *
from keras.utils import Sequence
from keras.layers import Conv2D, MaxPooling2D
from qkeras import *

from keras.utils import Sequence
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping

import os
import random

pi = 3.14159265359

maxval=1e9
minval=1e-9

2024-09-13 03:22:28.726012: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#from dataprep import *
from dataloaders.OptimizedDataGenerator import OptimizedDataGenerator
from loss import *
from models_v2 import *

# Scaling Lists for Different Pixel Pitches:
* 100x25x100 um:  [150.0, 37.5, 10.0, 1.22]
* 50x25x100 um:   [75.0, 37.5, 10.0, 1.22]
* 50x20x100 um:   [75.0, 30.0, 10.0, 1.22]
* 50x15x100 um:   [75.0, 22.5, 10.0, 1.22]
* 50x12.5x100 um: [75.0, 18.75, 10.0, 1.22]
* 50x25x100 um:   [75.0, 15.0, 10.0, 1.22]

In [None]:
batch_size = 5000
val_batch_size = 5000
train_file_size = 50
val_file_size = 10
tfrecords_dir_train = "/data/dajiang/smartPixels/tfrecords/tfrecords_20t_train_50x12P5x100_bnorm1"
tfrecords_dir_val = "/data/dajiang/smartPixels/tfrecords/tfrecords_20t_val_50x12P5x100_bnorm1"

training_generator = OptimizedDataGenerator(
    data_directory_path = "/data/dajiang/smartPixels/dataset_2s/dataset_2s_50x12P5_parquets/unflipped/recon3D/",
    labels_directory_path = "/data/dajiang/smartPixels/dataset_2s/dataset_2s_50x12P5_parquets/unflipped/labels/",
    is_directory_recursive = False,
    file_type = "parquet",
    data_format = "3D",
    batch_size = batch_size,
    file_count = train_file_size,
    to_standardize= True,
    include_y_local= False,
    labels_list = ['x-midplane','y-midplane','cotAlpha','cotBeta'],
    scaling_list = [75.0, 18.75, 10.0, 1.22],
    input_shape = (20,13,21),
    transpose = (0,2,3,1),
    files_from_end=True,
    shuffle= True,
    
    tfrecords_dir = tfrecords_dir_train,
    use_time_stamps = -1, #-1
    max_workers = 1, # Don't make this too large (will use up all RAM)
    seed = 10,
    quantize = True # Quantization ON
)

validation_generator = OptimizedDataGenerator(
    data_directory_path = "/data/dajiang/smartPixels/dataset_2s/dataset_2s_50x12P5_parquets/unflipped/recon3D/",
    labels_directory_path = "/data/dajiang/smartPixels/dataset_2s/dataset_2s_50x12P5_parquets/unflipped/labels/",
    is_directory_recursive = False,
    file_type = "parquet",
    data_format = "3D",
    batch_size = val_batch_size,
    file_count = val_file_size,
    to_standardize= True,
    include_y_local= False,
    labels_list = ['x-midplane','y-midplane','cotAlpha','cotBeta'],
    scaling_list = [75.0, 18.75, 10.0, 1.22],
    input_shape = (20,13,21),
    transpose = (0,2,3,1),
    files_from_end=True,
    shuffle= True,

    tfrecords_dir = tfrecords_dir_val,
    use_time_stamps = -1, #-1
    max_workers = 1, # Don't make this too large (will use up all RAM)
    seed = 10,
    quantize = True # Quantization ON
)

Directory /data/dajiang/smartPixels/tfrecords/tfrecords_20t_train_50x12P5x100_bnorm1 does not exist and cannot be removed.


Processing Files...: 100%|██████████| 41/41 [07:41<00:00, 11.26s/it]
Saving batches as TFRecords:   0%|          | 0/408 [00:00<?, ?it/s]2024-09-13 03:30:23.997106: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3234 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB MIG 1g.5gb, pci bus id: 0000:01:00.0, compute capability: 8.0
Saving batches as TFRecords:  98%|█████████▊| 400/408 [11:16<00:05,  1.39it/s]

In [None]:
# compiles model
model=CreateModel((13,21,20),n_filters=5,pool_size=3)
model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss=custom_loss)

In [None]:
# training
es = EarlyStopping(
    patience=50,
    restore_best_weights=True
)

base_dir = "./weights-50x12P5x100_bnorm1-checkpoints"
os.mkdir(base_dir)
checkpoint_filepath = base_dir + '/weights.{epoch:02d}-t{loss:.2f}-v{val_loss:.2f}.hdf5'
mcp = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    save_best_only=False,
)

class ScalePrintingCallback(keras.callbacks.Callback):    
    def on_epoch_end(self, epoch, logs=None):
        scale_layer = self.model.layers[-1]
        print(
            f"scaling layer ({epoch}):", 
            scale_layer.scale, 
            tf.math.softplus(scale_layer.scale)
        )

print_scale = ScalePrintingCallback()

history = model.fit(x=training_generator,
                    validation_data=validation_generator,
                    callbacks=[mcp],
                    epochs=600,
                    shuffle=False, # shuffling now occurs within the data-loader
                    verbose=1)