In [1]:
"""use notebook to ID all images with defects. Make sure bounding box coordinates exist. 

    then condition and send to API endpoint

"""
import os
import matplotlib.pyplot as plt
from PIL import Image
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import httpx

"""useful functions"""

def read_xml(xml_path):
    """Reads the XML file and extracts bounding box coordinates for all defects."""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    defects = []
    for obj in root.findall("object"):
        name = obj.find("name").text
        xmin = int(obj.find("bndbox/xmin").text)
        ymin = int(obj.find("bndbox/ymin").text)
        xmax = int(obj.find("bndbox/xmax").text)
        ymax = int(obj.find("bndbox/ymax").text)
        defects.append((name, xmin, ymin, xmax, ymax))
    return defects

def plot_defects_on_image(image_path, xml_path):
    """Plots bounding boxes for all defects on the image."""
    image = Image.open(image_path)
    defects = read_xml(xml_path)

    plt.imshow(image)
    
    for name, xmin, ymin, xmax, ymax in defects:
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                            linewidth=1, edgecolor="r", facecolor="none")
        plt.gca().add_patch(rect)
        plt.text(xmin, ymin, name, color="r", fontsize=8, backgroundcolor="white")

    plt.title(f"Defects in {os.path.basename(image_path)}")
    plt.axis("off")  # Hide axes
    plt.show()


def grab_defect_data(image_path, xml_path):

    defect_list = []

    """grabs bounding boxes info for all defects on the image for associated xml file."""
    image = Image.open(image_path)
    defects = read_xml(xml_path)

    h, w, c = np.shape(image)
    
    for name, xmin, ymin, xmax, ymax in defects:

        xmin_n = xmin/w
        xmax_n = xmax/w
        ymin_n = ymin/h
        ymax_n = ymax/h

        img_name = os.path.basename(image_path).split('/')[-1]
        defect_list.append([img_name, h, w, c, name, xmin_n, ymin_n, xmax_n, ymax_n])

    df = pd.DataFrame(data=defect_list, columns=['image_path', 'h', 'w', 'c', 'defect', 'xmin_n', 'ymin_n','xmax_n','ymax_n'])
    return df

In [2]:
#import httpx
#!pip install httpx

#API_ROOT = os.environ["API_ROOT"]


In [3]:
def upload_to_api(file_name, file_stream, file_mimetype):
    file_details_response = httpx.post(
        f"{API_ROOT}/dataset",
        files={"file": (file_name, file_stream, file_mimetype)},
        auth=(os.environ["API_KEY"], os.environ["API_SECRET"]),
        timeout=600.0,
    )

    file_details = file_details_response.json() 

    if file_details_response.status_code != 200:
        print("Something happened", file_details_response.status_code)

    return file_details["dataset_object_id"]


## make sure we dont upload twice!

def get_uploaded_files():
    list_response = httpx.get(
        f"{API_ROOT}/dataset",
        auth=(os.environ["API_KEY"], os.environ["API_SECRET"]),
    )

    json_response = list_response.json()

    return json_response["files"]


In [4]:
import mimetypes
mimetypes.init()
from hashlib import md5


In [5]:
import os

upload_count = 0
combined_folder = r"C:\Users\endle\Desktop\object-detection-pytorch-wandb-coco\data\combined"
df_f = pd.DataFrame(columns=['image_path', 'h', 'w', 'c', 'defect', 'xmin_n', 'ymin_n','xmax_n','ymax_n'])

for filename in os.listdir(combined_folder):
    print(filename)
    if filename.endswith(".bmp"):
        image_path = os.path.join(combined_folder, filename)
        xml_path = os.path.join(combined_folder, filename.replace(".bmp", ".xml"))

        # Check if the associated .xml file exists
        if os.path.exists(xml_path):


            #plot your defects on images here, double check overlay..its good.

            #plot_defects_on_image(image_path, xml_path)

            # defects stored as dataframe here
            df = grab_defect_data(image_path, xml_path)

            # # --- add image to API here -----
            # with open(image_path, "rb") as f:
            #     file_hash = md5(f.read()).hexdigest()

            #     file_mimetype = mimetypes.guess_type(image_path)[0]
            #     print(f"File mimetype: {file_mimetype}")
            #     file_guid = upload_to_api(image_path, f, file_mimetype)
            #     print(f"Uploaded {image_path} as {file_guid}")
            #     image_path.append(file_hash)
            #     upload_count += 1

            ## add json bounding box info

            # combine final dataframe

            df_f = pd.concat([df_f, df],ignore_index=True )



        else:
            print(f"Skipping {filename}: No associated .xml file found.")

print("Defect plots generated successfully!")


0.bmp
Skipping 0.bmp: No associated .xml file found.
1.bmp
Skipping 1.bmp: No associated .xml file found.
10.bmp
Skipping 10.bmp: No associated .xml file found.
100.bmp
Skipping 100.bmp: No associated .xml file found.
1000.bmp
Skipping 1000.bmp: No associated .xml file found.
1001.bmp
1001.xml
1002.bmp
Skipping 1002.bmp: No associated .xml file found.
1003.bmp
Skipping 1003.bmp: No associated .xml file found.
1004.bmp
Skipping 1004.bmp: No associated .xml file found.
1005.bmp
Skipping 1005.bmp: No associated .xml file found.
1006.bmp
Skipping 1006.bmp: No associated .xml file found.
1007.bmp
Skipping 1007.bmp: No associated .xml file found.
1008.bmp
Skipping 1008.bmp: No associated .xml file found.
1009.bmp
1009.xml
101.bmp
101.xml
1010.bmp
Skipping 1010.bmp: No associated .xml file found.
1011.bmp
Skipping 1011.bmp: No associated .xml file found.
1012.bmp
Skipping 1012.bmp: No associated .xml file found.
1013.bmp
Skipping 1013.bmp: No associated .xml file found.
1014.bmp
Skipping 1014

In [6]:
df_f

Unnamed: 0,image_path,h,w,c,defect,xmin_n,ymin_n,xmax_n,ymax_n
0,1001.bmp,256,256,3,scratch,0.003906,0.152344,0.066406,0.242188
1,1009.bmp,256,256,3,scratch,0.003906,0.003906,0.121094,0.316406
2,1009.bmp,256,256,3,scratch,0.121094,0.664062,0.230469,1.000000
3,1009.bmp,256,256,3,scratch,0.062500,0.648438,0.113281,0.746094
4,1009.bmp,256,256,3,scratch,0.003906,0.671875,0.042969,0.750000
...,...,...,...,...,...,...,...,...,...
523,987.bmp,256,256,3,paint,0.148438,0.042969,0.195312,0.101562
524,987.bmp,256,256,3,paint,0.003906,0.031250,0.074219,0.253906
525,988.bmp,256,256,3,paint,0.058594,0.902344,0.285156,1.000000
526,988.bmp,256,256,3,scratch,0.859375,0.761719,1.000000,1.000000


In [13]:
# for each image, grab associated 

for index, row in df_f.iterrows():
    defect = row['defect']
    xmin = row['xmin_n']
    xmax = row['xmax_n']
    ymin = row['ymin_n']
    ymax = row['ymax_n']
    #print(f"Defect: {defect}, Xmin: {xmin}")

    print(type(defect))

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class