<h1 style="padding: 25px 25px; background-color: #F5B041; font-family: Sans-Serif; color:black; text-align: center">
DL Project: Personal Protective Equipment Detection for Construction Workers</h1>

B041, Niyati Mehta

B042, Rutvi Mehta

B057, Vinay Purohit

<a id="data-description"></a>
<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Data description</h2>

- The dataset is taken from Kaggle.com:
https://www.kaggle.com/datasets/snehilsanyal/construction-site-safety-image-dataset-roboflow


- The dataset is a great collection of images, since the labels are in the following format: 'Hardhat', 'Mask', 'NO-Hardhat', 'NO-Mask', 'NO-Safety Vest', 'Person', 'Safety Cone', 'Safety Vest', 'machinery', 'vehicle'

    - Number of classes: 10
    - Label Annotation: YOLO format (.txt)
    - Metadata: metadata.csv and count.csv provides information about the dataset and train-val-test count information.




<a id="import-modules"></a>
<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Import modules</h2>

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.9-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.9-py3-none-any.whl (882 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m882.2/882.2 kB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.9-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.9 ultralytics-thop-2.0.9


In [None]:
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt
from IPython.core.magic import register_line_cell_magic
import yaml
from PIL import Image
import os
import seaborn as sns
from ultralytics import YOLO
from matplotlib.patches import Rectangle
import glob
import cv2


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


<a id="configuration"></a>
<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Configuration</h2>

In [None]:
warnings.filterwarnings("ignore")

np.random.seed(12345)

%matplotlib inline

!wandb disabled

/bin/bash: line 1: wandb: command not found


In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
INPUT_DIR = '/content/drive/MyDrive/DL_Project/css-data'
WORK_DIR = '/content/'

<a id="Data Exploration"></a>
<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Data Exploration</h2>

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Create data YAML file</b></h4>

In [None]:
num_classes = 10
classes = ['Hardhat', 'Mask', 'NO-Hardhat', 'NO-Mask', 'NO-Safety Vest', 'Person', 'Safety Cone', 'Safety Vest', 'machinery', 'vehicle']

In [None]:
dict_file = {'train': os.path.join(INPUT_DIR, 'train'),
             'val': os.path.join(INPUT_DIR, 'valid'),
             'test': os.path.join(INPUT_DIR, 'test'),
             'nc': num_classes,
             'names': classes
            }

with open(os.path.join(WORK_DIR, 'data.yaml'), 'w+') as file:
    yaml.dump(dict_file, file)

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Check class balance</b></h4>

In [None]:
class_idx = {str(i):classes[i] for i in range(num_classes)}

class_stat = {}
data_len = {}

for mode in ['train', 'valid', 'test']:

    class_count = {classes[i]:0 for i in range(num_classes)}

    path = os.path.join(INPUT_DIR, mode, 'labels')

    for file in os.listdir(path):

        with open(os.path.join(path, file)) as f:
            lines = f.readlines()

            for cls in set([line[0] for line in lines]):
                class_count[class_idx[cls]] += 1

    data_len[mode] = len(os.listdir(path))
    class_stat[mode] = class_count

In [None]:
fig, ax = plt.subplots(1,3, figsize=(15, 5), sharey=True)

for i, mode in enumerate(['train', 'valid', 'test']):
    sns.barplot(pd.DataFrame({mode:class_stat[mode]}).T/data_len[mode]*100, ax=ax[i])
    ax[i].set_title(mode)
    ax[i].tick_params(rotation=90)
    ax[i].set_ylabel('Percenatage of classes')
plt.show()

- There is imbalance in classes


- Overall class distribution between train, validation and test sets are similar.

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Image size</b></h4>

In [None]:
for mode in ['train', 'valid', 'test']:
    print(f'\nImage sizes in {mode} set:\n')
    img_size = 0
    for file in glob.glob(os.path.join(INPUT_DIR, mode, 'images', '*')):
        image = Image.open(file)
        if image.size != img_size:
            print(f'\t{image.size}')
            img_size = image.size

- All images have size **640x640**

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Dataset size</b></h4>

In [None]:
for mode in ['train', 'valid', 'test']:

    files =  glob.glob(os.path.join(INPUT_DIR, mode, 'images', '*'))

    print(f'{mode} set size: {len(files)}\n')

<a id="train"></a>

<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Train</h2>

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Load model<b></h4>

In [None]:
model = YOLO('yolov8n.pt')

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Train</b></h4>

In [None]:
model.train(data=os.path.join(WORK_DIR,'data.yaml'),
            task='detect',
            imgsz=640,
            epochs=20,
            batch=32,
            mode='train',
            name='yolov8n_v1_train')

<a id="test"></a>

<h2 style="padding: 12px 12px; background-color: #F5B041; font-family: Sans-Serif; color:black">
Test</h2>

In [None]:
model = YOLO('runs/detect/yolov8n_v1_train/weights/best.pt')

In [None]:
results = model.predict(source=os.path.join(INPUT_DIR, 'test', 'images'), save=True)

<h4 style="border-bottom: 3px solid #FAD7A0; padding: 12px 12px; font-family: Sans-Serif; color:black">
<b>Show prediction results</b></h4>

In [None]:
%matplotlib inline

predicitions = glob.glob(os.path.join(WORK_DIR, 'runs/detect/predict', '*'))
print(len(predicitions))
n = 10

for i in range(n):

        idx = np.random.randint(0, len(predicitions))
        print(idx)
        image = Image.open(predicitions[idx])
        plt.imshow(image)
        plt.grid(False)
        plt.show()

In [None]:
predicitions = glob.glob(os.path.join(WORK_DIR, 'runs/detect/predict', '*'))
print(len(predicitions))
n = 2

for i in range(n):

        idx = np.random.randint(0, len(predicitions))
        print(idx)
        image = Image.open(predicitions[idx])
        plt.imshow(image)
        plt.grid(False)
        plt.show()

In [None]:
predicitions = glob.glob(os.path.join(WORK_DIR, 'runs/detect/predict', '/content/runs/detect/predict/-1969-_png_jpg.rf.41dd58ed3ae83df95fb2417c679d581f.jpg'))
print(len(predicitions))
n = 1

for i in range(n):

        idx = 3
        print(idx)
        image = Image.open(predicitions[0])
        plt.imshow(image)
        plt.grid(False)
        plt.show()

**VALIDATION**

In [None]:
results = model.predict(source=os.path.join(INPUT_DIR, 'valid', 'images'), save=True)

In [None]:
%matplotlib inline

predicitions = glob.glob(os.path.join(WORK_DIR, 'runs/detect/predict', '*'))
print(len(predicitions))
n = 10

for i in range(n):
        idx = np.random.randint(0, len(predicitions))
        print(idx)
        image = Image.open(predicitions[idx])
        plt.imshow(image)
        plt.grid(False)
        plt.show()

In [None]:
from PIL import Image
from google.colab.patches import cv2_imshow
import cv2
import numpy as np

confusion_matrix = "/content/runs/detect/yolov8n_v1_train/confusion_matrix.png"
results_path = '/content/runs/detect/yolov8n_v1_train/results.png'
f1_curve = '/content/runs/detect/yolov8n_v1_train/F1_curve.png'
precision = '/content/runs/detect/yolov8n_v1_train/PR_curve.png'

print('Cnnfusion Matrix')
img = Image.open(confusion_matrix)
img_np = np.array(img)
cv2_imshow(img_np)

cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
print('Results')
img1 = Image.open(results_path)
img_np1 = np.array(img1)
cv2_imshow(img_np1)

In [None]:
print('F1 curve')
img2 = Image.open(f1_curve)
img_np2 = np.array(img2)
cv2_imshow(img_np2)

In [None]:
print('Precision Recall curve')
img3 = Image.open(precision)
img_np3 = np.array(img3)
cv2_imshow(img_np3)

In [None]:
results_csv = pd.read_csv('/content/runs/detect/yolov8n_v1_train/results.csv')
results_csv