In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, mixed_precision
from utils.DenseCL import DenseCL

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
print(tf.__version__)

In [None]:
tf.keras.backend.clear_session()

gpus = tf.config.list_physical_devices("GPU")
for g in gpus:
    tf.config.experimental.set_memory_growth(g, True)

mixed_precision.set_global_policy("mixed_float16" if gpus else "float32")

if len(gpus) > 1:
    strategy = tf.distribute.MirroredStrategy()
elif len(gpus) == 1:
    strategy = tf.distribute.OneDeviceStrategy("/GPU:0")
else:
    strategy = tf.distribute.OneDeviceStrategy("/CPU:0")

tf.random.set_seed(42)
print("GPUs:", gpus, "replicas:", strategy.num_replicas_in_sync)


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA T550 Laptop GPU, compute capability 7.5
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')] replicas: 1


2026-02-07 10:28:25.327322: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-02-07 10:28:25.377041: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-02-07 10:28:25.377080: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-02-07 10:28:25.377814: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2026-02-07 10:28:25.382970: I external/local_xla/xla/stream_executor

In [3]:
tf.sysconfig.get_build_info()

OrderedDict([('cpu_compiler', '/usr/lib/llvm-17/bin/clang'),
             ('cuda_compute_capabilities',
              ['sm_50', 'sm_60', 'sm_70', 'sm_75', 'compute_80']),
             ('cuda_version', '12.2'),
             ('cudnn_version', '8'),
             ('is_cuda_build', True),
             ('is_rocm_build', False),
             ('is_tensorrt_build', True)])

In [4]:
IMAGE_SIZE = 224
BATCH_SIZE = 32 * strategy.num_replicas_in_sync  # Scale batch size with GPUs
EPOCHS = 25
STEPS_PER_EPOCH = 8166 // BATCH_SIZE # There are 8166 images in the training set

with strategy.scope():
    # Model definition must happen inside strategy scope
    backbone = tf.keras.applications.ResNet50(
        include_top=False, weights=None, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
    )

    # --- STEP 3: COSINE LEARNING RATE SCHEDULE ---
    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate=0.0,  # Start at 0 for warmup
        decay_steps=STEPS_PER_EPOCH * EPOCHS,
        warmup_target=1e-3,  # Target LR from your notebook
        warmup_steps=STEPS_PER_EPOCH * 2,  # 2 epochs of warmup
    )

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    model = DenseCL(backbone)
    model.compile(optimizer=optimizer, run_eagerly=False)

2026-02-07 10:28:45.973182: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2026-02-07 10:28:46.171830: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:30] 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_INVALID_IMAGE'

2026-02-07 10:28:46.171880: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:30] 'cuModuleGetFunction(&function, module, kernel_name)' failed with 'CUDA_ERROR_INVALID_HANDLE'

2026-02-07 10:28:46.171893: W tensorflow/core/framework/op_kernel.cc:1827] INTERNAL: 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, 0, reinterpret_cast<CUstream>(stream), params, nullptr)' failed with 'CUDA_ERROR_INVALID_HANDLE'


InternalError: {{function_node __wrapped__Minimum_device_/job:localhost/replica:0/task:0/device:GPU:0}} 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, 0, reinterpret_cast<CUstream>(stream), params, nullptr)' failed with 'CUDA_ERROR_INVALID_HANDLE' [Op:Minimum] name: 

In [4]:
data_augment = tf.keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

def make_two_views(image):
    # Keep input pipeline numerics stable; mixed precision policy handles compute casts.
    image = tf.cast(image, tf.float32) / 255.0
    return (data_augment(image, training=True), data_augment(image, training=True))


In [None]:
def create_tfrecords(source_dir, output_file):
    image_paths = []
    for root, dirs, files in os.walk(source_dir):
        for f in files:
            if f.lower().endswith((".png", ".jpg", ".jpeg")):
                image_paths.append(os.path.join(root, f))

    with tf.io.TFRecordWriter(output_file) as writer:
        for path in image_paths:
            img_raw = open(path, "rb").read()
            example = tf.train.Example(
                features=tf.train.Features(
                    feature={
                        "image": tf.train.Feature(
                            bytes_list=tf.train.BytesList(value=[img_raw])
                        )
                    }
                )
            )
            writer.write(example.SerializeToString())


# Convert your data
create_tfrecords("E:\\figures_no_labeled", "figures_data.tfrecords")


# --- UPDATED LOADER FOR JUPYTER ---
def decode_fn(record_bytes):
    feats = tf.io.parse_single_example(
        record_bytes, {"image": tf.io.FixedLenFeature([], tf.string)}
    )
    image = tf.io.decode_jpeg(feats["image"], channels=3)
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return image


train_ds = tf.data.TFRecordDataset("figures_data.tfrecords")
train_ds = train_ds.shuffle(1000).map(decode_fn, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE).map(make_two_views).prefetch(tf.data.AUTOTUNE)

In [6]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    "E:\\Figures",
    label_mode=None,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=True,
)

# Optimization: Cache in memory (if RAM allows) and use AUTOTUNE
train_ds = train_ds.cache()
train_ds = train_ds.map(make_two_views, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

Found 8166 files.


In [7]:
print("TF version:", tf.__version__)
print("Devices:", tf.config.list_physical_devices())

TF version: 2.20.0
Devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [None]:
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir="logs/densecl_test")],
)

model.backbone.save("models/densecl_resnet50_optimized")