In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import Adam
import keras
from keras.models import Sequential, Model
from keras.layers import *
from keras.utils import Sequence
from keras.layers import Conv2D, MaxPooling2D
from qkeras import *

from keras.utils import Sequence
from keras.callbacks import CSVLogger
from keras.callbacks import EarlyStopping

import os
import glob
import shutil
import random
import psutil
import random

pi = 3.14159265359

maxval=1e9
minval=1e-9

2025-03-29 18:28:13.702102: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from dataloaders.OptimizedDataGenerator import OptimizedDataGenerator
from loss import *
from models.models import *

#### Set random seed for reproducibility

In [3]:
seed = 1
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

#### Scaling Lists for Different Pixel Pitches (dataset_2s):
* 100x25x100 um:  [150.0, 37.5, 10.0, 1.22]
* 50x25x100 um:   [75.0, 37.5, 10.0, 1.22]
* 50x20x100 um:   [75.0, 30.0, 10.0, 1.22]
* 50x15x100 um:   [75.0, 22.5, 10.0, 1.22]
* 50x12.5x100 um: [75.0, 18.75, 10.0, 1.22]
* 50x10x100 um:   [75.0, 15.0, 10.0, 1.22]

#### Scaling Lists for Different Pixel Pitches (dataset_3sr):
* 100x25x100 um:  [150.0, 37.5, 10.0, 10.0]
* 50x25x100 um:   [75.0, 37.5, 10.0, 10.0]
* 50x20x100 um:   [75.0, 30.0, 10.0, 10.0]
* 50x15x100 um:   [75.0, 22.5, 10.0, 10.0]
* 50x12.5x100 um: [75.0, 18.75, 10.0, 10.0]
* 50x10x100 um:   [75.0, 15.0, 10.0, 10.0]

#### Scaling Lists for Different Pixel Pitches (dataset_3sr, |cot$\beta$| $\leq$ 1.5 preselection):
* 100x25x100 um:  [150.0, 37.5, 10.0, 1.5], 
* 50x25x100 um:   [75.0, 37.5, 10.0, 1.5],
* 50x20x100 um:   [75.0, 30.0, 10.0, 1.5]
* 50x15x100 um:   [75.0, 22.5, 10.0, 1.5]
* 50x12.5x100 um: [75.0, 18.75, 10.0, 1.5]
* 50x10x100 um:   [75.0, 15.0, 10.0, 1.5]

In [4]:
batch_size = 5000
val_batch_size = 5000
train_file_size = 60
val_file_size = 20

# Specify what directory to save the tfrecords
tfrecords_dir_train = '/data/dajiang/smart-pixels/tfrecords/tfrecords_dataset_3sr_50x12P5_20t_bs5000_cotBeta1P5_train'
tfrecords_dir_val = '/data/dajiang/smart-pixels/tfrecords/tfrecords_dataset_3sr_50x12P5_20t_bs5000_cotBeta1P5_val'

training_generator = OptimizedDataGenerator(
    data_directory_path = '/data/dajiang/smart-pixels/dataset_3sr/dataset_3sr_50x12P5_cotBeta1P5_parquets/unflipped/',
    labels_directory_path = '/data/dajiang/smart-pixels/dataset_3sr/dataset_3sr_50x12P5_cotBeta1P5_parquets/unflipped/',
    is_directory_recursive = False,
    file_type = 'parquet',
    data_format = '3D',
    batch_size = batch_size,
    file_count = train_file_size,
    to_standardize= True,
    include_y_local= False,
    labels_list = ['x-midplane','y-midplane','cotAlpha','cotBeta'],
    scaling_list = [75.0, 18.75, 10.0, 1.5],
    input_shape = (20,13,21),
    transpose = (0,2,3,1),
    files_from_end = True,
    shuffle = True,

    load_from_tfrecords_dir = '/data/dajiang/smart-pixels/tfrecords/tfrecords_dataset_3sr_50x12P5_20t_bs5000_cotBeta1P5_train',
    tfrecords_dir = tfrecords_dir_train,
    use_time_stamps = -1, #-1
    max_workers = 1, # Don't make this too large (will use up all RAM)
    seed = seed,
    quantize = True # Quantization ON
)

validation_generator = OptimizedDataGenerator(
    data_directory_path = '/data/dajiang/smart-pixels/dataset_3sr/dataset_3sr_50x12P5_cotBeta1P5_parquets/unflipped/',
    labels_directory_path = '/data/dajiang/smart-pixels/dataset_3sr/dataset_3sr_50x12P5_cotBeta1P5_parquets/unflipped/',
    is_directory_recursive = False,
    file_type = 'parquet',
    data_format = '3D',
    batch_size = val_batch_size,
    file_count = val_file_size,
    to_standardize= True,
    include_y_local= False,
    labels_list = ['x-midplane','y-midplane','cotAlpha','cotBeta'],
    scaling_list = [75.0, 18.75, 10.0, 1.5],
    input_shape = (20,13,21),
    transpose = (0,2,3,1),
    files_from_end = False,
    shuffle = True,

    load_from_tfrecords_dir = '/data/dajiang/smart-pixels/tfrecords/tfrecords_dataset_3sr_50x12P5_20t_bs5000_cotBeta1P5_val',
    tfrecords_dir = tfrecords_dir_val,
    use_time_stamps = -1, #-1
    max_workers = 1, # Don't make this too large (will use up all RAM)
    seed = seed,
    quantize = True # Quantization ON
)



In [5]:
# compiles model
model=CreateModel((13,21,20),n_filters=5,pool_size=3)
model.summary()

2025-03-29 18:28:43.179010: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3234 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB MIG 1g.5gb, pci bus id: 0000:01:00.0, compute capability: 8.0


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 13, 21, 20)]      0         
                                                                 
 q_separable_conv2d (QSepar  (None, 11, 19, 5)         285       
 ableConv2D)                                                     
                                                                 
 q_activation (QActivation)  (None, 11, 19, 5)         0         
                                                                 
 q_conv2d (QConv2D)          (None, 11, 19, 5)         30        
                                                                 
 q_activation_1 (QActivatio  (None, 11, 19, 5)         0         
 n)                                                              
                                                                 
 average_pooling2d (Average  (None, 3, 6, 5)           0     

In [6]:
model.compile(optimizer=Adam(learning_rate=0.001), loss=custom_loss)

In [None]:
# training
pitch = '50x12P5'
fingerprint = '%08x' % random.randrange(16**8)
base_dir = '/home/dajiang/smart-pixels-ml/weights/weights_7pitches/dataset_3sr_cotBeta1P5_weights/'
weights_dir = base_dir + 'weights-{}-bs{}-{}-checkpoints'.format(pitch, batch_size, fingerprint)

# create output directories
if os.path.isdir(base_dir):
    os.mkdir(weights_dir)
else:
    os.mkdir(base_dir)
    os.mkdir(weights_dir)
    
checkpoint_filepath = weights_dir + '/weights.{epoch:02d}-t{loss:.2f}-v{val_loss:.2f}.hdf5'
mcp = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    save_best_only=False,
)

print('Model fingerprint: {}'.format(fingerprint))

history = model.fit(x=training_generator,
                    validation_data=validation_generator,
                    callbacks=[mcp],
                    epochs=500,
                    shuffle=False, # shuffling now occurs within the data-loader
                    verbose=1)

Model fingerprint: 414c343c
Epoch 1/500


2025-03-29 18:30:09.354767: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8906
2025-03-29 18:30:09.419017: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2025-03-29 18:30:09.647159: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2025-03-29 18:30:09.653686: I tensorflow/core/util/cuda_solvers.cc:179] Creating GpuSolver handles for stream 0x92bda80
2025-03-29 18:30:09.673591: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fa1ade76ed0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-03-29 18:30:09.673631: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA A100-SXM4-40GB MIG 1g.5gb, Compute Capability 8.0
2025-03-29 18:30:09.677428: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir

