In [1]:
import os
import json

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import utils_CNN as utils

from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score
from sklearn.model_selection import KFold

os.environ['CUDA_VISIBLE_DEVICES'] = '2'
# solve the problem of "libdevice not found at ./libdevice.10.bc"
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/home/r10222035/.conda/envs/tf2'

2024-06-29 17:12:06.467011: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-29 17:12:06.549399: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# Sampling datasets

In [2]:
def get_sample_size(y):
    if len(y.shape) == 1:
        ns = (y == 1).sum()
        nb = (y == 0).sum()
    else:
        ns = (y.argmax(axis=1) == 1).sum()
        nb = (y.argmax(axis=1) == 0).sum()
    print(ns, nb)
    return ns, nb

In [3]:
config_file = 'config_files/origin_25x25_config_01.json'

In [4]:
# Read config file
with open(config_file, 'r') as f:
    config = json.load(f)

train_npy_paths = config['train_npy_paths']
val_npy_paths = config['val_npy_paths']
seed = config['seed']
sensitivity = config['sensitivity']
luminosity = config['luminosity']

true_label_path = config['true_label_path']
model_name = config['model_name']
sample_type = config['sample_type']

# Sampling dataset
# r_train, r_val = 1.0, 0.2
n_SR_S, n_SR_B, n_SB_S, n_SB_B = utils.compute_nevent_in_SR_SB(sensitivity=sensitivity, L=luminosity)

nevents = np.array([n_SR_S, n_SB_S, n_SR_B, n_SB_B]).astype(int)
X, y = utils.create_mix_sample_from_npy(train_npy_paths, nevents, seed=seed)

Background cross section, SR: 136.13 fb, SB: 145.57 fb
Background sample size: SR: 18922.4, SB: 20234.0
Signal sample size: SR: 0.0, SB: 0.0
Preparing dataset from ['../Sample/HVmodel/data/origin/25x25']


In [5]:
# 定義k-fold的數量
k = 5
kf = KFold(n_splits=k)

In [6]:
get_sample_size(y)

18922 20233


(18922, 20233)

## CNN

In [7]:
class CNN(tf.keras.Model):
    def __init__(self, name='CNN'):
        super(CNN, self).__init__(name=name)

        self.bn1 = tf.keras.layers.BatchNormalization()

        self.bn2 = tf.keras.layers.BatchNormalization()

        self.sub_network = tf.keras.Sequential([
            tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'),
            tf.keras.layers.MaxPool2D((2, 2)),
            tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'),
            tf.keras.layers.MaxPool2D((2, 2)),
            tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
            tf.keras.layers.MaxPool2D((2, 2)),
            tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid'),
        ])

    @tf.function
    def call(self, inputs, training=False):
        # split two channels
        channel1, channel2 = tf.split(inputs, num_or_size_splits=2, axis=-1)

        # pass through the same CNN
        channel1 = self.bn1(channel1)
        channel2 = self.bn2(channel2)
        output_channel1 = self.sub_network(channel1)
        output_channel2 = self.sub_network(channel2)

        # multiply the output
        output = tf.multiply(output_channel1, output_channel2)

        return output

In [8]:
# Training parameters
train_epochs = 500
patience = 3
min_delta = 0.
learning_rate = 1e-4

In [9]:
for fold, (train_index, val_index) in enumerate(kf.split(X)):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    BATCH_SIZE = 512
    with tf.device('CPU'):
        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
        train_dataset = train_dataset.shuffle(buffer_size=len(y_train)).batch(BATCH_SIZE)
        # del X_train, y_train

        valid_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
        valid_dataset = valid_dataset.batch(BATCH_SIZE)

    save_model_name = f'CNN_models/last_model_CNN_fold_{fold+1}/'

    # Create the model  
    model = CNN()
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                metrics=['accuracy'])

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=min_delta, verbose=1, patience=patience)
    check_point = tf.keras.callbacks.ModelCheckpoint(save_model_name, monitor='val_loss', verbose=1, save_best_only=True)

    history = model.fit(train_dataset, validation_data=valid_dataset, epochs=train_epochs,
                        callbacks=[early_stopping,
                                   check_point,
                                ]
                        )

2024-06-29 17:12:20.685815: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2024-06-29 17:12:20.685875: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: pheno-2
2024-06-29 17:12:20.685883: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: pheno-2
2024-06-29 17:12:20.686766: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.256.2
2024-06-29 17:12:20.686797: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.239.6
2024-06-29 17:12:20.686803: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 470.239.6 does not match DSO version 470.256.2 -- cannot find working devices in this configuration
2024-06-29 17:12

Epoch 1/500
Epoch 1: val_loss improved from inf to 0.99820, saving model to CNN_models/last_model_CNN_1/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_1/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_1/assets


Epoch 2/500
Epoch 2: val_loss improved from 0.99820 to 0.96957, saving model to CNN_models/last_model_CNN_1/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_1/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_1/assets


Epoch 3/500
Epoch 3: val_loss did not improve from 0.96957
Epoch 4/500
Epoch 4: val_loss did not improve from 0.96957
Epoch 5/500
Epoch 5: val_loss did not improve from 0.96957
Epoch 5: early stopping
Epoch 1/500
Epoch 1: val_loss improved from inf to 0.97338, saving model to CNN_models/last_model_CNN_2/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_2/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_2/assets


Epoch 2/500
Epoch 2: val_loss did not improve from 0.97338
Epoch 3/500
Epoch 3: val_loss did not improve from 0.97338
Epoch 4/500
Epoch 4: val_loss did not improve from 0.97338
Epoch 4: early stopping
Epoch 1/500
Epoch 1: val_loss improved from inf to 0.76668, saving model to CNN_models/last_model_CNN_3/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


Epoch 2/500
Epoch 2: val_loss improved from 0.76668 to 0.71015, saving model to CNN_models/last_model_CNN_3/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


Epoch 3/500
Epoch 3: val_loss did not improve from 0.71015
Epoch 4/500
Epoch 4: val_loss improved from 0.71015 to 0.68634, saving model to CNN_models/last_model_CNN_3/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


Epoch 5/500
Epoch 5: val_loss improved from 0.68634 to 0.68361, saving model to CNN_models/last_model_CNN_3/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_3/assets


Epoch 6/500
Epoch 6: val_loss did not improve from 0.68361
Epoch 7/500
Epoch 7: val_loss did not improve from 0.68361
Epoch 8/500
Epoch 8: val_loss did not improve from 0.68361
Epoch 8: early stopping
Epoch 1/500
Epoch 1: val_loss improved from inf to 1.14575, saving model to CNN_models/last_model_CNN_4/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


Epoch 2/500
Epoch 2: val_loss improved from 1.14575 to 0.99847, saving model to CNN_models/last_model_CNN_4/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


Epoch 3/500
Epoch 3: val_loss improved from 0.99847 to 0.89250, saving model to CNN_models/last_model_CNN_4/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_4/assets


Epoch 4/500
Epoch 4: val_loss did not improve from 0.89250
Epoch 5/500
Epoch 5: val_loss did not improve from 0.89250
Epoch 6/500
Epoch 6: val_loss did not improve from 0.89250
Epoch 6: early stopping
Epoch 1/500
Epoch 1: val_loss improved from inf to 1.18667, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 2/500
Epoch 2: val_loss improved from 1.18667 to 1.03798, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 3/500
Epoch 3: val_loss improved from 1.03798 to 0.96828, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 4/500
Epoch 4: val_loss did not improve from 0.96828
Epoch 5/500
Epoch 5: val_loss did not improve from 0.96828
Epoch 6/500
Epoch 6: val_loss improved from 0.96828 to 0.90969, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 7/500
Epoch 7: val_loss improved from 0.90969 to 0.89765, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 8/500
Epoch 8: val_loss did not improve from 0.89765
Epoch 9/500
Epoch 9: val_loss improved from 0.89765 to 0.86519, saving model to CNN_models/last_model_CNN_5/




INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


INFO:tensorflow:Assets written to: CNN_models/last_model_CNN_5/assets


Epoch 10/500
Epoch 10: val_loss did not improve from 0.86519
Epoch 11/500
Epoch 11: val_loss did not improve from 0.86519
Epoch 12/500
Epoch 12: val_loss did not improve from 0.86519
Epoch 12: early stopping


# Training results

In [10]:
sample_dir = f'../Sample/HVmodel/data/split_val/25x25/mix_sample_test.npy'
X_test, y_test = utils.load_samples(sample_dir)

In [12]:
for fold in range(k):
    model_path = f'CNN_models/last_model_CNN_{fold+1}/'
    loaded_model = tf.keras.models.load_model(model_path)

    results = loaded_model.evaluate(x=X_test, y=y_test, batch_size=BATCH_SIZE)
    print(f'Testing Loss = {results[0]:.3}, Testing Accuracy = {results[1]:.3}')

Testing Loss = 0.674, Testing Accuracy = 0.517
Testing Loss = 0.68, Testing Accuracy = 0.508
Testing Loss = 0.671, Testing Accuracy = 0.647
Testing Loss = 0.657, Testing Accuracy = 0.498
Testing Loss = 0.7, Testing Accuracy = 0.5
