In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import tensorflow.keras as keras
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split
from glob import glob
import os
from tqdm import tqdm
from datetime import datetime
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

2024-05-30 22:02:25.454158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-30 22:02:25.553327: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
gpus = tf.config.experimental.list_logical_devices('GPU')
strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
print('\n\n Running on multiple GPUs ', [gpu.name for gpu in gpus])

2024-05-30 22:02:43.238071: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-30 22:02:44.905354: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14825 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:3b:00.0, compute capability: 7.5
2024-05-30 22:02:44.906503: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14825 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:5e:00.0, compute capability: 7.5
2024-05-30 22:02:44.907451: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:local



 Running on multiple GPUs  ['/device:GPU:0', '/device:GPU:1', '/device:GPU:2', '/device:GPU:3']


In [3]:
with strategy.scope():
    w, h = 19, 19
    base_path = os.path.join('dataset2', '*/*.npz')
    
    file_list = glob(base_path)
    
    x_data, y_data = [], []
    for file_path in tqdm(file_list):
        data = np.load(file_path)
        x_data.extend(data['inputs'])
        y_data.extend(data['outputs'])
    
    x_data = np.array(x_data, np.float32).reshape((-1, h, w, 1))
    y_data = np.array(y_data, np.float32).reshape((-1, h * w))
    
    x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=2020)
    
    del x_data, y_data
    
    print(x_train.shape, y_train.shape)
    print(x_val.shape, y_val.shape)

 23%|████████████▍                                         | 5754/24938 [00:07<00:23, 806.41it/s]


KeyboardInterrupt: 

In [5]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# GPU 메모리 사용량 확인
logical_gpus = tf.config.list_logical_devices('GPU')
print(f"Physical GPUs: {len(gpus)}, Logical GPUs: {len(logical_gpus)}")


Physical devices cannot be modified after being initialized
Physical GPUs: 4, Logical GPUs: 4


In [5]:
with strategy.scope():
    model = models.Sequential([
        layers.Conv2D(32, 7, activation='relu', padding='same', input_shape=(19, 19, 1)),
        layers.Conv2D(64, 7, activation='relu', padding='same',),
        layers.Conv2D(128, 7, activation='relu', padding='same'),
        layers.Conv2D(256, 7, activation='relu', padding='same'),
        layers.Conv2D(128, 7, activation='relu', padding='same'),
        layers.Conv2D(64, 7, activation='relu', padding='same'),
        layers.Conv2D(32, 7, activation='relu', padding='same'),
        layers.Conv2D(1, 1, activation=None, padding='same'),  
        layers.Reshape((h * w,)),
        layers.Activation('sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['acc']
    )
    
    model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 19, 19, 64)        3200      
                                                                 
 conv2d_7 (Conv2D)           (None, 19, 19, 128)       401536    
                                                                 
 conv2d_8 (Conv2D)           (None, 19, 19, 256)       1605888   
                                                                 
 conv2d_9 (Conv2D)           (None, 19, 19, 512)       6423040   
                                                                 
 conv2d_10 (Conv2D)          (None, 19, 19, 256)       6422784   
                                                                 
 conv2d_11 (Conv2D)          (None, 19, 19, 128)       1605760   
                                                                 
 conv2d_12 (Conv2D)          (None, 19, 19, 64)       

In [6]:
with strategy.scope():
    start_time = datetime.now().strftime('%Y%m%d_%H%M%S')
    os.makedirs('models', exist_ok=True)
    
    model.fit(
        x=x_train,
        y=y_train,
        batch_size=256,
        epochs=10,
        callbacks=[
            ModelCheckpoint('./models/%s.h5' % (start_time), monitor='val_acc', verbose=1, save_best_only=True, mode='auto'),
            ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=5, verbose=1, mode='auto')
        ],
        validation_data=(x_val, y_val),
        use_multiprocessing=True,
        workers=16
    )

Epoch 1/10


2024-05-30 20:49:32.239763: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8907
2024-05-30 20:49:33.080178: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8907
2024-05-30 20:49:33.871704: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8907
2024-05-30 20:49:34.172879: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2024-05-30 20:49:34.417423: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8907

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2024-05



2024-05-30 21:14:24.709089: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 238.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-05-30 21:14:24.709394: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 748.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-05-30 21:14:24.709562: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 748.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-05-30 21:14:24.709734: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocato

KeyboardInterrupt: 