## Download and prepare the dataset

In [14]:
import numpy as np
import shutil
from zipfile import ZipFile
import matplotlib.pyplot as plt
import os
from PIL import Image
import cv2
import glob
import random
from sklearn.model_selection import train_test_split

In [2]:
! gdown --id 1KWnX3eMPJrzhsegi0LmyGUbUV5pqKw_R
!apt-get install -y unzip
!unzip ss_dataset.zip

Downloading...
From: https://drive.google.com/uc?id=1KWnX3eMPJrzhsegi0LmyGUbUV5pqKw_R
To: /content/ss_dataset.zip
100% 5.59G/5.59G [00:34<00:00, 161MB/s]
Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest version (6.0-25ubuntu1.1).
0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.
Archive:  ss_dataset.zip
   creating: ss_dataset/
   creating: ss_dataset/3/
  inflating: ss_dataset/3/210.bmp    
  inflating: ss_dataset/3/65_label.bmp  
  inflating: ss_dataset/3/495.bmp    
  inflating: ss_dataset/3/266.bmp    
  inflating: ss_dataset/3/423_label.bmp  
  inflating: ss_dataset/3/518_label.bmp  
  inflating: ss_dataset/3/540_label.bmp  
  inflating: ss_dataset/3/423.bmp    
  inflating: ss_dataset/3/221.bmp    
  inflating: ss_dataset/3/391.bmp    
  inflating: ss_dataset/3/309_label.bmp  
  inflating: ss_dataset/3/70.bmp     
  inflating: ss_dataset/3/231.bmp    
  inflating: ss_dataset/3/90_label.bmp  


In [3]:
root_dir = '/content/ss_dataset'

data_dir = os.path.join(root_dir)

image_root = '/content/train'
label_root = '/content/train_masks'
if not os.path.isdir(image_root):
    os.mkdir(image_root)
if not os.path.isdir(label_root):
    os.mkdir(label_root)

images = list()
labels = list()

'''
Iterate the whole dataset and aggregate the files as png image file in train and train_masks
directories
'''

################################################
############## YOUR CODES GO HERE ##############

# have file paths in a list
for j in os.listdir(data_dir):
    for i in os.listdir(os.path.join(data_dir, j)):
        if i.endswith('.bmp'):
            if '_label' in i:
                labels.append(os.path.join(os.path.join(data_dir, j), i))
            else:
                images.append(os.path.join(os.path.join(data_dir, j), i))

for path in images:
    #read the image
    image = Image.open(path)
    #resize the image
    resized = image.resize((256, 256))
    #save the image as png
    new_name = os.path.basename(path)
    new_name = os.path.splitext(new_name)[0]
    resized.save(os.path.join(image_root, f"{new_name}.png"), 'PNG')

# Process labeled images too
for path in labels:
    #read the image
    image = Image.open(path)
    #resize the image
    resized = image.resize((256, 256))
    #save the image as png
    new_name = os.path.basename(path)
    new_name = os.path.splitext(new_name)[0]
    resized.save(os.path.join(label_root, f"{new_name}.png"), 'PNG')

################################################

In [4]:
print("Train set:  ", len(os.listdir("/content/train")))
print("Train masks:", len(os.listdir("/content/train_masks")))

Train set:   859
Train masks: 859


In [5]:
'''
Create yolo-based labels for each image.
Consider the routine describes in the question.
Don't forget to split the dataset to train and val
Hint:
  https://www.geeksforgeeks.org/find-and-draw-contours-using-opencv-python/
  https://docs.opencv.org/4.x/da/d0c/tutorial_bounding_rects_circles.html
'''

################################################
############## YOUR CODES GO HERE ##############

def yolo_based_label(contours , image):

    #define the labels as said in the pdf
    with open(label_path, 'w') as f:
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            #get center and w and h normalized and then write it in the file
            y_normalized = (y + h / 2) / image.shape[0]
            x_normalized = (x + w / 2) / image.shape[1]
            w_normalized = w / image.shape[1]
            h_normalized = h / image.shape[0]
            f.write(f"{0} {x_normalized:.6f} {y_normalized:.6f} {w_normalized:.6f} {h_normalized:.6f}" + "\n")

for image_file in os.listdir(image_root):
    image_path = os.path.join(image_root, image_file)
    label_path = os.path.join(image_root, os.path.splitext(image_file)[0] + '.txt')
    # read images and convert to gray and then binary
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    #find the contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    yolo_based_label(contours,image)

    # save images in train folder
    img_name = os.path.splitext(os.path.basename(image_path))[0] + '.png'
    img_path = os.path.join('train', img_name)
    cv2.imwrite(img_path, image)

print(len(os.listdir(label_root)))
################################################

859


In [6]:
!ls train/*.png | wc -l

859


In [7]:
!ls train/*.txt | wc -l

859


In [11]:
def plot_bbox_on_img(rect, img):
    '''
    draw the rects around objects and show the result
    '''
    ################################################
    ############## YOUR CODES GO HERE ##############
    imgfile = cv2.imread(img)
    for r in rect:
        x, y, w, h = map(int,r)
        cv2.rectangle(imgfile, (x, y), (x + w, y + h), (0, 255, 0), 2)
    ################################################
    plt.imshow(imgfile)
    plt.show()

base_path = 'content/train'
txtsPath = 'content/train'

txts = glob.glob(os.path.join(txtsPath, '*.txt'))

for txt in txts[:10]:
    plot_bbox_on_img(txt, os.path.join(base_path, '{}.png'.format(txt.split('/')[-1].split('.')[0])))

## Train the model

In [None]:
!git clone https://github.com/WongKinYiu/yolov7.git

In [13]:
#### Clone the codes from repo: https://github.com/WongKinYiu/yolov7.git
#### Read the readme file

################################################
############## YOUR CODES GO HERE ##############

!pip install -qr /content/yolov7/requirements.txt

################################################

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
data_path = '/content/'
image_path = '/content/train'
label_path = '/content/train_masks'

train_file = os.path.join(data_path, 'train.txt')
val_file = os.path.join(data_path, 'val.txt')

class_names = ['0', '1']
image_files = [f for f in os.listdir(image_path) if f.endswith('.jpg') or f.endswith('.png')]

# make train and validation dataset
train, validation = train_test_split(image_files, test_size=0.2, random_state=42)

# make the train file
with open(train_file, 'w') as f:
    for name in train:
        image_path = os.path.join(image_path, name)
        label_path = os.path.join(image_path, name.replace('.jpg', '.txt').replace('.png', '.txt'))
        if os.path.isfile(label_path):
            f.write(f"{image_path}\n")
            f.write(f"{label_path}\n")


# make the val file
with open(val_file, 'w') as f:
    for name in validation:
        image_path = os.path.join(image_path, name)
        label_path = os.path.join(image_path, name.replace('.jpg', '.txt').replace('.png', '.txt'))
        if os.path.isfile(label_path):
            f.write(f"{image_path}\n")
            f.write(f"{label_path}\n")



# make the YAML file
yaml = os.path.join(data_path, 'customdata.yaml')

with open(yaml, 'w') as f:
    f.write("# Dataset\n\n")
    f.write(f"train: {train_file}\n")
    f.write(f"val: {val_file}\n")
    f.write("\n")
    f.write("# Number of classes\n")
    f.write(f"nc: {len(class_names)}\n")
    f.write("\n")
    f.write("# Class names\n")
    f.write("names: " + str(class_names))

In [None]:
!python train_aux.py --workers 8 --device 0 --batch-size 16 --data /content/customdata.yaml --img 256 256 --cfg cfg/training/yolov7-w6.yaml --weights '' --name yolov7-w6 --hyp data/hyp.scratch.p6.yaml
!python train.py --workers 8 --device 0 --batch-size 32 --data /content/customdata.yaml --img 256 256 --cfg cfg/training/yolov7.yaml --weights '' --name yolov7 --hyp data/hyp.scratch.p5.yaml

## Inference the trained model

In [None]:
### Show the results


################################################
############## YOUR CODES GO HERE ##############

################################################