In [1]:
# Cloning YOLO github
!git clone https://github.com/ultralytics/yolov5
!pip install -r yolov5/requirements.txt
!pip install utils
!pip install pylabel > /dev/null

Cloning into 'yolov5'...
remote: Enumerating objects: 15705, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 15705 (delta 9), reused 23 (delta 6), pack-reused 15672[K
Receiving objects: 100% (15705/15705), 14.50 MiB | 9.57 MiB/s, done.
Resolving deltas: 100% (10754/10754), done.
Collecting gitpython>=3.1.30
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralytics>=8.0.100
  Downloading ultralytics-8.0.109-py3-none-any.whl (591 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m591.4/591.4 kB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
Collecting setuptools>=65.5.1
  Downloading setuptools-67.8.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Importing the Necessary Libraries

In [2]:
import torch
import os 
import random
import shutil
import xml.etree.ElementTree as ET
import numpy as np
import matplotlib.pyplot as plt
import logging
import os 
import zipfile
import yaml
import PIL

from IPython.display import Image  # for displaying images
from sklearn.model_selection import train_test_split
from pylabel import importer
from xml.dom import minidom
from tqdm import tqdm
from PIL import Image, ImageDraw

logging.getLogger().setLevel(logging.CRITICAL)
random.seed(42)

# Convert COCO JSON Annotations to YOLO TXT Files

In [None]:
from pylabel import importer

## Import COCO Annotations

In [3]:
# Copy images_raw to working directory
# Note: This may take some time depending on the size of your images_raw folder
!cp -r /kaggle/input/plastic-bag-and-cigratte-butts/Cigarrette_Butts ./

In [None]:
# Copy annotations to working directory
!cp -r  /kaggle/input/cigarette-butt/images/train ./

In [None]:
# Copy test image to output directory
!cp -r ../input/plastic-paper-garbage-bag-synthetic-images/ImageClassesCombinedWithCOCOAnnotations/test_image.jpg ./

In [None]:
#Specify path to the coco.json file
path_to_annotations = r"./coco_instances.json"
#Specify the path to the images (if they are in a different folder than the annotations)
path_to_images = r"./images_raw"

#Import the dataset into the pylable schema 
dataset = importer.ImportCoco(path_to_annotations, path_to_images=path_to_images, name="BCCD_coco")
path_to_cigar_anno = r"./train"
for file in os.listdir(path_to_cigar_anno):
    filename = file.split(".")[-1]
    if filename.endswith("xml"):

## Analyze annotations
Pylabel can calculate basic summary statisticts about the dataset such as the number of files and the classes. 
The dataset is stored as a pandas frame so the developer can do additional exploratory analysis on the dataset.

In [None]:
print(f"Number of images: {dataset.analyze.num_images}")
print(f"Number of classes: {dataset.analyze.num_classes}")
print(f"Classes:{dataset.analyze.classes}")
print(f"Class counts:\n{dataset.analyze.class_counts}")
print(f"Path to annotations:\n{dataset.path_to_annotations}")

## Visualize Annotations 
You can render the bounding boxes for your image to inspect them and confirm that they imported correctly.

In [None]:
try:
    display(dataset.visualize.ShowBoundingBoxes(2))
    display(dataset.visualize.ShowBoundingBoxes("./images_raw/00000002.jpg"))
except:
    pass

## Export to YOLOv5 Format

In [None]:
# This cell may take some time depending on the size of the dataset.
dataset.path_to_annotations = "labels"
dataset.export.ExportToYoloV5(output_path='text_files');

In [None]:
# Note!!! Only run this code once
path = './text_files' #path of labels
labels = os.listdir(path)
for x in labels:
    lines = list()
    with open(path+"/"+x, "r+") as f:
        for line in f.read().splitlines():
            split_line = line.split(" ")  # split on space character (and remove newline characters as well)
            split_line[0] = str(
              int(split_line[0]) - 1)  # update the value inside the loop. the loop used in later not needed.
            lines.append(split_line)  # add split list into list of lines

    with open(path+"/"+x, 'w') as file:  # rewrite to file
        for line in lines:
            write_me = ' '.join(line)  # Use join method to add the element together
            file.write(write_me + "\n")

# Splitting the Dataset

In [None]:
# Read images and annotations
image_dir = r'./images_raw'
images = [os.path.join(image_dir, x) for x in os.listdir(image_dir)]
annotations = [os.path.join('./text_files', x) for x in os.listdir('./text_files') if x[-3:] == "txt"]

images.sort()
annotations.sort()

# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

In [None]:
len(train_images),len(train_annotations)

# Format Directories

In [None]:
!mkdir images
!mkdir annotations
!mkdir images/train images/val images/test annotations/train annotations/val annotations/test

In [None]:
#Utility function to move images 
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

# Move the splits into their folders
move_files_to_folder(train_images, 'images/train')
move_files_to_folder(val_images, 'images/val/')
move_files_to_folder(test_images, 'images/test/')
move_files_to_folder(train_annotations, 'annotations/train/')
move_files_to_folder(val_annotations, 'annotations/val/')
move_files_to_folder(test_annotations, 'annotations/test/')

In [None]:
!mv annotations labels

Move images folder inside the yolov5 directory

In [None]:
shutil.move("./images", "./yolov5")

Move labels folder inside the yolov5 directory

In [None]:
shutil.move("./labels", "./yolov5")

# Adjusting the dataset.yaml file

* `path`: images
* `train`: train
* `val`: val
* `test`: test

In [8]:
# Viewing the original unprocessed yaml file

yaml_params = {}
with open(r'/kaggle/working/Cigarrette_Butts/data.yaml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    yaml_file_list = yaml.load(file, Loader=yaml.FullLoader)
    yaml_params = yaml_file_list
    print(yaml_file_list)

{'train': 'Cigarrette_Butts/train/images', 'val': 'Cigarrette_Butts/val/images', 'test': 'Cigarrette_Butts/test/images', 'nc': 4, 'names': {0: 'CigaretteButt', 1: 'Garbage Bag', 2: 'Paper Bag', 3: 'Plastic Bag'}}


In [19]:
# Adjusting the parameters of the yaml file
yaml_params['train'] = '/kaggle/working/Cigarrette_Butts/train/images'
yaml_params['val'] = '/kaggle/working/Cigarrette_Butts/val/images'
yaml_params['test'] = '/kaggle/working/Cigarrette_Butts/test/images'
yaml_params

{'train': '/kaggle/working/Cigarrette_Butts/train/images',
 'val': '/kaggle/working/Cigarrette_Butts/val/images',
 'test': '/kaggle/working/Cigarrette_Butts/test/images',
 'nc': 4,
 'names': {0: 'CigaretteButt',
  1: 'Garbage Bag',
  2: 'Paper Bag',
  3: 'Plastic Bag'},
 'path': '/kaggle/working/Cigarrette_Butts'}

In [20]:
# Overwriting the new params from the previous ones.
with open(r'dataset.yaml', 'w') as file:
    documents = yaml.dump(yaml_params, file)

# Training the Custom Dataset using YOLOv5

Now, we train the network. We use various flags to set options regarding training.

In [16]:
# Change the current directory inisde the yolov5
%cd 

/kaggle/working


## Training using YOLOv5

In [None]:
!python /kaggle/working/yolov5/train.py --img 640 --cfg /kaggle/working/yolov5/models/yolov5m.yaml --hyp /kaggle/working/yolov5/data/hyps/hyp.scratch-med.yaml --batch 32 --epochs 100 --data /kaggle/working/dataset.yaml --weights yolov5m.pt --workers 24 --name yolo_finetune --freeze 1

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5m.pt, cfg=/kaggle/working/yolov5/models/yolov5m.yaml, data=/kaggle/working/dataset.yaml, hyp=/kaggle/working/yolov5/data/hyps/hyp.scratch-med.yaml, epochs=100, batch_size=32, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=24, project=yolov5/runs/train, name=yolo_finetune, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[1], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with

# Running Inferenece on the Test Data

There are many ways to run inference using the detect.py file.

The source flag defines the source of our detector, which can be:

* A single image
* A folder of images
* Video
* Webcam

...and various other formats. We want to run it over our test images so we set the source flag to .. yolov5/images/test.

The weights flag defines the path of the model which we want to run our detector with.
conf flag is the thresholding objectness confidence.
name flag defines where the detections are stored. We set this flag to yolo_road_det; therefore, the detections would be stored in runs/detect/yolo__det/.
With all options decided, let us run inference over our test dataset.

In [None]:
!python detect.py --source 0 --weights runs/train/yolo_bag_det/weights/best.pt --conf 0.25 --name yolo_bag_det

In [None]:
detections_dir = "runs/detect/yolo_bag_det/"
detection_images = [os.path.join(detections_dir, x) for x in os.listdir(detections_dir)]

fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(15, 15),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    random_detection_image = PIL.Image.open(random.choice(detection_images))
    ax.imshow(random_detection_image)

# Predict on our own Test Image

In [None]:

!python detect.py --source 0 --weights runs/train/yolo_bag_det/weights/best.pt --conf 0.25 --name yolo_bag_det

In [None]:
detections_dir = "runs/detect/yolo_bag_det2/"
detection_images = [os.path.join(detections_dir, x) for x in os.listdir(detections_dir)]
random_detection_image = PIL.Image.open(random.choice(detection_images))
plt.figure(figsize=(30,30));
plt.imshow(random_detection_image)
plt.xticks([])
plt.yticks([]);