In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import cv2
import sys

MODEL_PATH = './stanford_dogs_model/EfficientNetB0_best_model.keras'
OUTPUT_VIDEO_FILENAME = 'stanford_dogs_predictions.avi'
VIDEO_FPS = 60
SECONDS_PER_IMAGE = 0.3
TARGET_VIDEO_SECONDS = 60

FRAMES_PER_IMAGE = int(VIDEO_FPS * SECONDS_PER_IMAGE)
if FRAMES_PER_IMAGE <= 0:
    print("Error: FRAMES_PER_IMAGE must be greater than 0.")
    sys.exit(1)

TOTAL_VIDEO_FRAMES = TARGET_VIDEO_SECONDS * VIDEO_FPS
NUM_IMAGES_TO_PROCESS = TOTAL_VIDEO_FRAMES // FRAMES_PER_IMAGE

print(f"Loading model from {MODEL_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH)
print("Model loaded successfully.")

IMG_SIZE = model.input_shape[1:3]
if None in IMG_SIZE:
    IMG_SIZE = (224, 224)

print(f"Model expects input images of size: {IMG_SIZE}")

print("Loading Stanford Dogs test dataset...")
try:
    ds_test_dataset, ds_info = tfds.load(
        'stanford_dogs',
        split='test',
        with_info=True,
        as_supervised=True
    )
    class_names = ds_info.features['label'].names
    test_set_size = ds_info.splits['test'].num_examples
    print(f"Dataset loaded. Found {len(class_names)} classes and {test_set_size} test images.")

    if NUM_IMAGES_TO_PROCESS > test_set_size:
        print(f"Warning: Target video length requires {NUM_IMAGES_TO_PROCESS} images, but test set only has {test_set_size}.")
        NUM_IMAGES_TO_PROCESS = test_set_size
        TOTAL_VIDEO_FRAMES = NUM_IMAGES_TO_PROCESS * FRAMES_PER_IMAGE
        TARGET_VIDEO_SECONDS = TOTAL_VIDEO_FRAMES / VIDEO_FPS
        print(f"Processing all {test_set_size} test images. Video length will be approximately {TARGET_VIDEO_SECONDS:.2f} seconds.")
    else:
         print(f"Processing {NUM_IMAGES_TO_PROCESS} images to achieve a video length of approximately {TARGET_VIDEO_SECONDS:.2f} seconds.")

except Exception as e:
    print(f"Error loading dataset: {e}")
    print("Please ensure 'stanford_dogs' dataset is available or check your internet connection.")
    sys.exit(1)

try:
    for first_image_tf, _ in ds_test_dataset.take(1).as_numpy_iterator():
        frame_height, frame_width, _ = first_image_tf.shape
        break
    print(f"Video frame size determined from first image: {frame_width}x{frame_height}")
except Exception as e:
    print(f"Error determining frame size from dataset: {e}")
    sys.exit(1)

fourcc = cv2.VideoWriter_fourcc(*'MJPG')

print(f"\nAttempting to initialize VideoWriter for {OUTPUT_VIDEO_FILENAME} using codec 'MJPG'...")

try:
    out = cv2.VideoWriter(OUTPUT_VIDEO_FILENAME, fourcc, VIDEO_FPS, (frame_width, frame_height))
    if not out.isOpened():
        print(f"Error: VideoWriter not opened successfully for {OUTPUT_VIDEO_FILENAME}.")
        print("Ensure the 'MJPG' codec is available on your system and check file permissions.")
        sys.exit(1)
    print(f"Video writer created: {OUTPUT_VIDEO_FILENAME}, Codec: MJPG, FPS: {VIDEO_FPS}, Size: {frame_width}x{frame_height}")
except Exception as e:
    print(f"Error creating VideoWriter: {e}")
    sys.exit(1)

print(f"\nStarting prediction and writing to {OUTPUT_VIDEO_FILENAME}...")
processed_images_count = 0

for original_image_np, true_label_np in ds_test_dataset.take(NUM_IMAGES_TO_PROCESS).as_numpy_iterator():

    image_for_model_tf = tf.convert_to_tensor(original_image_np, dtype=tf.float32)
    image_for_model_tf = tf.image.resize(image_for_model_tf, IMG_SIZE)
    image_for_model_tf = tf.keras.applications.efficientnet.preprocess_input(image_for_model_tf)
    image_for_model_np = np.expand_dims(image_for_model_tf.numpy(), axis=0)

    predictions = model.predict(image_for_model_np, verbose=0)

    predicted_class_index = np.argmax(predictions[0])
    confidence = predictions[0][predicted_class_index]
    predicted_class_name = class_names[predicted_class_index]
    true_class_name = class_names[true_label_np]

    display_img_np = cv2.cvtColor(original_image_np, cv2.COLOR_RGB2BGR)

    prediction_text = f"Pred: {predicted_class_name} ({confidence:.2f})"
    true_text = f"True: {true_class_name}"

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.8
    font_thickness = 2
    text_color = (0, 255, 0)
    error_color = (0, 0, 255)
    text_background_color = (0, 0, 0)

    current_text_color = text_color if predicted_class_name == true_class_name else error_color

    (pred_text_width, pred_text_height), baseline_pred = cv2.getTextSize(prediction_text, font, font_scale, font_thickness)
    (true_text_width, true_text_height), baseline_true = cv2.getTextSize(true_text, font, font_scale, font_thickness)

    pred_text_origin = (10, 30)
    true_text_origin = (10, pred_text_origin[1] + pred_text_height + 10)

    padding = 5
    cv2.rectangle(display_img_np, (pred_text_origin[0] - padding, pred_text_origin[1] - pred_text_height - padding),
                  (pred_text_origin[0] + pred_text_width + padding, pred_text_origin[1] + baseline_pred + padding),
                  text_background_color, -1)

    cv2.rectangle(display_img_np, (true_text_origin[0] - padding, true_text_origin[1] - true_text_height - padding),
                  (true_text_origin[0] + true_text_width + padding, true_text_origin[1] + baseline_true + padding),
                  text_background_color, -1)


    cv2.putText(display_img_np, prediction_text, pred_text_origin, font, font_scale, current_text_color, font_thickness, cv2.LINE_AA)
    cv2.putText(display_img_np, true_text, true_text_origin, font, font_scale, text_color, font_thickness, cv2.LINE_AA)


    if out.isOpened():
        for _ in range(FRAMES_PER_IMAGE):
            out.write(display_img_np)
    else:
        print("\nError: VideoWriter is no longer open. Stopping writing.")
        break

    processed_images_count += 1
    print(f"Processed image {processed_images_count}/{NUM_IMAGES_TO_PROCESS}", end='\r')

if out is not None and out.isOpened():
    out.release()
    print(f"\nFinished writing video to {OUTPUT_VIDEO_FILENAME}.")
elif out is not None:
    print(f"\nVideo writer was not open at the end. No video file might have been written or it could be corrupt: {OUTPUT_VIDEO_FILENAME}")
else:
    print("\nVideo writer was not initialized.")

2025-05-10 22:37:01.392591: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746896821.408395   64716 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746896821.413310   64716 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746896821.425896   64716 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746896821.425913   64716 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746896821.425915   64716 computation_placer.cc:177] computation placer alr

Loading model from ./stanford_dogs_model/EfficientNetB0_best_model.keras...


I0000 00:00:1746896823.974468   64716 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3005 MB memory:  -> device: 0, name: Quadro P1000, pci bus id: 0000:01:00.0, compute capability: 6.1


Model loaded successfully.
Model expects input images of size: (160, 160)
Loading Stanford Dogs test dataset...
Dataset loaded. Found 120 classes and 8580 test images.
Processing 200 images to achieve a video length of approximately 60.00 seconds.


2025-05-10 22:37:06.312170: I tensorflow/core/kernels/data/tf_record_dataset_op.cc:387] The default buffer size is 262144, which is overridden by the user specified `buffer_size` of 8388608


Video frame size determined from first image: 500x332

Attempting to initialize VideoWriter for stanford_dogs_predictions.avi using codec 'MJPG'...
Video writer created: stanford_dogs_predictions.avi, Codec: MJPG, FPS: 60, Size: 500x332

Starting prediction and writing to stanford_dogs_predictions.avi...


I0000 00:00:1746896828.107684   64801 service.cc:152] XLA service 0x7efb80047660 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746896828.107702   64801 service.cc:160]   StreamExecutor device (0): Quadro P1000, Compute Capability 6.1
2025-05-10 22:37:08.204805: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1746896828.783450   64801 cuda_dnn.cc:529] Loaded cuDNN version 90501
I0000 00:00:1746896835.435547   64801 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Processed image 200/200
Finished writing video to stanford_dogs_predictions.avi.


2025-05-10 22:37:32.615174: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
