In [1]:
import os #
import shutil # 
import cv2
import xml.etree.ElementTree as ET #
import matplotlib.pyplot as plt #
from sklearn.model_selection import train_test_split 

# Data

In [2]:
DATASET_DIR = '../datasets'
ANNOTATIONS_DIR = f'{DATASET_DIR}/annotations'
IMAGES_DIR = f'{DATASET_DIR}/images'
LABELS_DIR = f'{DATASET_DIR}/labels'

CLASS_INDEXS = {'without_mask': 0, 'with_mask': 1, 'mask_weared_incorrect': 2}
CLASS_NAMES = {0: 'without_mask', 1: 'with_mask', 2: 'mask_weared_incorrect'}

In [3]:
def xml2YoloBox(bndbox, width, height):
    """Convert xml bounding box to YOLO bounding box.
    
        Args:
            bndbox (list | np.darray): A xml bounding box with format [xmin, ymin, xmax, ymax]
            width (int): A width of entire image
            height (int): A height of entire image
        Returns:
            yoloBox (list): The bounding box in YOLO format [xcenter, ycenter, boxWidth, boxHeight]
    """
    xcenter = ((bndbox[0] + bndbox[2]) / 2.) / width
    ycenter = ((bndbox[1] + bndbox[3]) / 2.) / height
    boxWidth = (bndbox[2] - bndbox[0]) / width
    boxHeight = (bndbox[3] - bndbox[1]) / height
    yoloBox = [xcenter, ycenter, boxWidth, boxHeight]
    return yoloBox

def yolo2XmlBox(bndbox, width, height):
    """Convert YOLO bounding box to xml bounding box.
    
        Args:
            bndbox (list | np.darray): A YOLO bounding box with format [xcenter, ycenter, boxWidth, boxHeight]
            width (int): A width of entire image
            height (int): A height of entire image
        Returns:
            xmlBox (list): The bounding box in xml format [xmin, ymin, xmax, ymax]
    """
    xmin = (bndbox[0] - bndbox[2] / 2.) * width
    ymin = (bndbox[1] - bndbox[3] / 2.) * height
    xmax = (bndbox[0] + bndbox[2] / 2.) * width
    ymax = (bndbox[1] + bndbox[3] / 2.) * height
    xmlBox = [int(xmin), int(ymin), int(xmax), int(ymax)]
    return xmlBox

In [4]:
def xml2YoloFormat(filepath):
    """Convert all objects in xml file to Ultralytics YOLO format.
    
        Args:
            filepath (Path): Path to xml file 
        Returns:
            allObjs (list): All objects in Ultralytics YOLO format
    """
    tree = ET.parse(filepath)
    root = tree.getroot()
    imageWidth = int(root.find('size').find('width').text)    
    imageHeight = int(root.find('size').find('height').text)
    
    allObjs = []
    for obj in root.findall('object'):
        className = obj.find('name').text
        classIdx = CLASS_INDEXS[className]
        xmlBox = [int(obj.find('bndbox')[i].text) for i in range(4)]
        yoloBox = xml2YoloBox(xmlBox, imageWidth, imageHeight)
        allObjs.append([classIdx] + yoloBox)
    return allObjs

# Preprocess the dataset 
Skip this one if you have already preprocessed the dataset (continue from  **Visuallization**).

In [5]:
if os.path.exists(LABELS_DIR):
    assert any([x not in os.listdir(LABELS_DIR) for x in ['train', 'val', 'test']]), "The dataset was preprocessed"

In [None]:
# Write to txt file
os.makedirs(LABELS_DIR, exist_ok=True)
for filename in os.listdir(ANNOTATIONS_DIR):
    filepath = os.path.join(ANNOTATIONS_DIR, filename)
    data = xml2YoloFormat(filepath)
    f = open(f'{LABELS_DIR}/{filename[:-4]}.txt','w')            
    f.write('\n'.join(' '.join(map(str, obj)) for obj in data))
    f.close()   

In [7]:
def moveFile(filenames, imgPath, imgDest, labelPath, labelDest):
    os.makedirs(imgDest, exist_ok=True)    
    os.makedirs(labelDest, exist_ok=True)

    for filename in filenames:
        imgSrc = os.path.join(imgPath, filename + '.png')
        labelSrc = os.path.join(labelPath, filename + '.txt')
        shutil.move(imgSrc, imgDest)
        shutil.move(labelSrc, labelDest)

In [8]:
allFilenames = [f[:-4] for f in os.listdir(IMAGES_DIR)]

#  Split train-val-test 
random_state = 1
train, valTest = train_test_split(allFilenames, test_size=0.3, random_state=random_state, shuffle=True) 
val, test = train_test_split(list(valTest), test_size=0.5, random_state=random_state, shuffle=True)

# Copy to corresponding folder
moveFile(train, IMAGES_DIR, f'{IMAGES_DIR}/train/', LABELS_DIR, f'{LABELS_DIR}/train/')
moveFile(val, IMAGES_DIR, f'{IMAGES_DIR}/val/', LABELS_DIR, f'{LABELS_DIR}/val/')
moveFile(test, IMAGES_DIR, f'{IMAGES_DIR}/test/', LABELS_DIR, f'{LABELS_DIR}/test/')

# Visualization

In [9]:
def drawBoxes(image, bndboxes, withConfScore=False, isRgb=True):
    """Draw parsing bounding boxes on an parsing image.
        Args:
            image (Image): The original image.
            bndboxes (list): List of predicted bounding boxes, format: [x, y, w, h, cls, conf].
            name (str): Name to save the image.
            withConfScore (bool, optional): Show confidence score or not. Defaults is False.
            isRgb (bool, optional): The parsing image is rgb or bgr? (Just to keep the bounding box color consistent).
        Returns:
            (Image): The image with drawn bounding boxes.
    """
    # Specific color for each class
    if isRgb:
        classColor = {0: (255,0,0), 1: (0,255,0), 2: (0,0,255)}
    else: # bgr
        classColor = {2: (255,0,0), 1: (0,255,0), 0: (0,0,255)}
        
    
    # Load the image
    newImg = image.copy()
    h, w, _ = newImg.shape
  
    for obj in bndboxes:
        xmin, ymin, xmax, ymax = yolo2XmlBox(obj[:4], w, h)
        classIdx = obj[4]
        className = CLASS_NAMES[classIdx]
        color = classColor[classIdx]
        text = f"{className}({obj[5]})" if withConfScore else f"{className}"
        
        newImg = cv2.rectangle(newImg, (xmin, ymin), (xmax, ymax), color=color, thickness=2)
        newImg = cv2.putText(newImg, text, (xmin, ymin-5), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                   fontScale=0.5, color=color, thickness=1, lineType=cv2.LINE_AA)
    return newImg

In [10]:
def showImage(imagePath, predictedBoxes=None, labelPath=None):
    """Display an image with optinal predicted bounding boxes and true bounding boxes
    
        Args:
            imagePath (Path): Path to image
            predictedBoxes (list | np.darray, optinal): 
            labelPath (str, optinal): Path to true bounding boxes. Default is None
    """
    # Create a figure for plotting
    fig = plt.figure(figsize=(12, 8))
    numRows = 1
    numCols = 3 if (predictedBoxes is not None and labelPath is not None) else 2  
    
    # Load the image
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    
    # Display the original image
    imgIdx = 1
    ax1 = plt.subplot(numRows, numCols, imgIdx)
    ax1.imshow(image)
    ax1.set_title('Original image')
    
    # Display the predicted bounding boxes
    if predictedBoxes is not None:
        imgIdx += 1
        ax2 = plt.subplot(numRows, numCols, imgIdx)
        predictedImg = drawBoxes(image, predictedBoxes, withConfScore=True)
        ax2.imshow(predictedImg)
        ax2.set_title('Prediction')
    
    
    # Display the true bouding boxes
    if labelPath is not None:
        imgIdx += 1
        ax3 = plt.subplot(numRows, numCols, imgIdx)
        
        # Load true bounding boxes from label file
        trueBoxes = []
        with open(labelPath) as labelFile:
            for line in labelFile.readlines():
                bndbox = list(map(float, line.split()))
                order = [1, 2, 3, 4, 0]
                bndbox = [bndbox[order[i]] for i in range(5)]
                trueBoxes.append(bndbox)
        
        groundtruthImg = drawBoxes(image, trueBoxes, withConfScore=False)
        ax3.imshow(groundtruthImg)
        ax3.set_title('Grouth truth')
    fig.tight_layout()
    plt.show()

In [None]:
filename = 'maksssksksss129'
imagePath = f'{IMAGES_DIR}/test/{filename}.png'
labelPath = f'{LABELS_DIR}/test/{filename}.txt'
showImage(imagePath, labelPath=labelPath)

# Model

In [12]:
# !wandb disabled

In [13]:
root = os.getcwd()
datasetPath = os.path.join(root, DATASET_DIR)

# Create dataset.yaml
yamlText = f"""path: {datasetPath}
train: images/train 
val: images/val/ 
test: images/test/

names:
    0: without_mask
    1: with_mask
    2: mask_weared_incorrect"""

with open(f"data.yaml", 'w') as file:
    file.write(yamlText)

In [14]:
from ultralytics import YOLO

In [None]:
# Load a model
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

# Train the model 
trainResults = model.train(data="data.yaml", epochs=5, imgsz=480)

In [None]:
resultFig = cv2.imread(f'{str(trainResults.save_dir)}/results.png')
resultFig = cv2.cvtColor(resultFig, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(15, 10))
plt.axis('off')
plt.imshow(resultFig)
plt.show()

In [None]:
# Load a model
model = YOLO(f'{str(trainResults.save_dir)}/weights/best.pt')  # load a custom model

# Validate the model
metrics = model.val(data="data.yaml")  # no arguments needed, dataset and settings remembered

In [None]:
metrics = model.val(data="data.yaml", split='test')  # no arguments needed, dataset and settings remembered

In [None]:
%matplotlib inline 

pred = model.predict(imagePath, save=True)

# Display saved prediction
predictedPath = f'{pred[0].save_dir}/{filename}.jpg'
predictedImg = cv2.imread(predictedPath)
predictedImg = cv2.cvtColor(predictedImg, cv2.COLOR_BGR2RGB)
plt.imshow(predictedImg)

In [20]:
def getPrediction(model, imagePath):
    """Return a prediction of parsing model for parsing image
        Args:
            model (torch.nn.Module): Model used for prediction
            imagePath (Path): Path to the image to be predicted 
        Return:
            prediction (list): Prediction for parsing image
    """
    results = model.predict(source=imagePath, conf=0.7, verbose=False)
    prediction = []
    for i in range(len(results[0].boxes.xywhn)):
        classIdx = results[0].boxes.cls[i].cpu().item()
        conf = round(results[0].boxes.conf[i].cpu().item(), 2)
        pred = list(results[0].boxes.xywhn[i].cpu().numpy())
        pred.append(classIdx)
        pred.append(conf)
        prediction.append(pred)
    return prediction

In [None]:
# Working with Results
prediction = getPrediction(model, imagePath)
showImage(imagePath, predictedBoxes=prediction, labelPath=labelPath)

# REAL-TIME CAMERA

In [None]:
%matplotlib inline

os.makedirs('camera', exist_ok=True) # Directory to store images
print("Press 'q' to capture image")
print("Press 'w' to predict on real-time")
print("Press 'space' to close camera")

# Open the video capture object for your webcam (index 0)
cap = cv2.VideoCapture(0)

isRealtimeDetection = False
while True:
  # Capture frame-by-frame
  ret, frame = cap.read()

  # Check if frame is captured successfully
  if not ret:
      print("Error! Unable to capture frame")
      break

  # Get keyboard input
  key = cv2.waitKey(1)
  
  # Press 'w' to detect in real time
  if key & 0xFF == ord('w'):
      isRealtimeDetection = not isRealtimeDetection
  
  if isRealtimeDetection:
      imgPath = 'camera/realtime.png'
      cv2.imwrite(imgPath, frame)
      prediction = getPrediction(model, imgPath)
      predictedImg = drawBoxes(frame, bndboxes=prediction, withConfScore=True, isRgb=False)
      frame = predictedImg
    
  # Press 'q' to capture image and exit
  if key & 0xFF == ord('q'):
      # Save the frame as an image
      imgPath = 'camera/captured_image.png'
      cv2.imwrite(imgPath, frame)
      print("Image captured!")
      prediction = getPrediction(model, imgPath)
      showImage(imgPath, predictedBoxes=prediction)
      break

  # Press 'space' to exit without capturing
  if key & 0xFF == ord(' '):
      break
  
  # Display the resulting frame
  cv2.imshow('Camera', frame)

# Release the capture and close all windows
cap.release()
cv2.destroyAllWindows()
