In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
main_path=os.getcwd()
print(main_path)

C:\Users\METE\Desktop\worksight_tf\workspace\training_demo


In [2]:
image_paths = []
for image_name in os.listdir("C:\\Users\\METE\\Desktop\\single_image_trial"):
    image_paths.append(os.path.join("C:\\Users\\METE\\Desktop\\single_image_trial", image_name))
    image_saving_name = os.path.splitext(image_name)[0]

In [3]:
MODEL_DATE = '20230902'
MODEL_NAME = 'ws_resnet'
LABEL_FILENAME = 'label_map.pbtxt'
PATH_TO_LABELS = os.path.join(main_path, "annotations", LABEL_FILENAME)
PATH_TO_MODEL_DIR = os.path.join(main_path, "models")
PATH_TO_CFG = os.path.join(PATH_TO_MODEL_DIR, MODEL_NAME, "pipeline.config" )
PATH_TO_CKPT = os.path.join(PATH_TO_MODEL_DIR, MODEL_NAME)

In [4]:
import time
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder


print('Loading model... ', end='')
start_time = time.time()

# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(PATH_TO_CFG)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(PATH_TO_CKPT, 'ckpt-8')).expect_partial()

def detect_fn(image):
    """Detect objects in image."""

    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)

    return detections

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model... Done! Took 0.4941534996032715 seconds


In [5]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)

In [6]:
%matplotlib inline
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')   # Suppress Matplotlib warnings

In [7]:
def load_image_into_numpy_array(path):
    """Load an image from file into a numpy array.

    Puts image into numpy array to feed into tensorflow graph.
    Note that by convention we put it into a numpy array with shape
    (height, width, channels), where channels=3 for RGB.

    Args:
      path: the file path to the image

    Returns:
      uint8 numpy array with shape (img_height, img_width, 3)
    """
    return np.array(Image.open(path))

In [8]:
def process_data(box, detection_class, score):
    return [box[0],box[1],box[2],box[3], detection_class + label_id_offset, score]

In [9]:
def create_label(image_path, label_names, all_bbox_coordinates, width, height):
    if not label_names:
        return

    path_wo_ext = os.path.splitext(image_path)[0]
    txt_filename = os.path.join(path_wo_ext + ".xml")
    folder_path = os.path.dirname(image_path)
    folder_name = os.path.basename(folder_path)
    filename = os.path.basename(image_path)

    with open(txt_filename, "a") as txt_file:
        top_annotation = f"""<annotation>
    <folder>{folder_name}</folder>
    <filename>{filename}</filename>
    <path>{image_path}</path>
    <source>
            <database>Unknown</database>
    </source>
    <size>
            <width>{width}</width>
            <height>{height}</height>
            <depth>3</depth>
    </size>
    <segmented>0</segmented>
    """
        txt_file.write(top_annotation)
        for label_name, bb_coordinates in zip(label_names, all_bbox_coordinates):
            object_info = f"""<object>
    <name>{label_name}</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
            <xmin>{bb_coordinates[1]}</xmin>
            <ymin>{bb_coordinates[0]}</ymin>
            <xmax>{bb_coordinates[3]}</xmax>
            <ymax>{bb_coordinates[2]}</ymax>
    </bndbox>
</object>"""
            txt_file.write(object_info)

        txt_file.write("</annotation>")

In [10]:
for image_path in image_paths:
    print('Running inference for {}... \n'.format(image_path), end='')

    image_np = load_image_into_numpy_array(image_path)
    
    num_channels = image_np.shape[2] if len(image_np.shape) == 3 else 1

    # If the image has 4 channels (RGBA), remove the alpha channel
    if num_channels == 4:
        image_np = image_np[:, :, :3]  # Keep only the first three channels (RGB)
    img_height, img_width = image_np.shape[0], image_np.shape[1]
    # Things to try:
    # Flip horizontally
    # image_np = np.fliplr(image_np).copy()

    # Convert image to grayscale
    # image_np = np.tile(
    #     np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)

    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)

    detections = detect_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    aggregated_list = list(map(process_data,detections['detection_boxes'], detections['detection_classes'], detections['detection_scores']))
    label_names = []
    all_bbox_coordinates = []
    for whole_values  in aggregated_list:
        if whole_values[5] > 0.5:
            label_id = int(whole_values[4])
            label_name = (category_index.get(int(whole_values[4]))).get("name")
            label_names.append(label_name)
            ymin = int(whole_values[0]*img_height)
            xmin = int(whole_values[1]*img_width)
            ymax = int(whole_values[2]*img_height)
            xmax = int(whole_values[3]*img_width)
            bbox_coordinates = [ymin,xmin,ymax,xmax]
            all_bbox_coordinates.append(bbox_coordinates)
    create_label(image_path, label_names, all_bbox_coordinates, img_width, img_height)

Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_0.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_10.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_100.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1000.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1001.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1002.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1003.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1004.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1005.jpg... 
Running inference for C:\Users\METE\Desktop\single_image_trial\totaltest_frame_1006.jpg..