# Data Generate Class


In [1]:
import json
import numpy as np
import cv2 as cv
import math
import os
import random


class GenerateData:
    
    def __init__(self , img_size):
        self.img_size = img_size
        self.imgarr = np.zeros((img_size,img_size), dtype=np.uint8)
        self.dim_x = self.imgarr.shape[0]
        self.dim_y = self.imgarr.shape[1]
        self.img_margin = 0.2*(self.img_size / 2)
        self.stage_margin = 0.15*(self.img_size/2)
        self.center_x = math.floor(self.dim_x / 2)
        self.center_y = math.floor(self.dim_y / 2)
        self.stage_radius = self.img_size/2 - self.stage_margin
        self.wafer_radius = self.img_size/2 - self.img_margin
        self.notch_radius = round(1.25*(self.wafer_radius/100))
        self.grid_radius = int(0.95*self.wafer_radius)
        self.boundary_box = []
    
    def GenerateJson(self , name , topr_x , topr_y , botr_x , botr_y):
        
        self.boundary_box.append({
            "label": f"{name}",
            "points": [[int(topr_x), int(topr_y)], [int(botr_x), int(botr_y)]],
            "group_id": None,
            "shape_type": "rectangle",
            "flags": {}
        })
        
    def DrawBoundary(self ,min_x , min_y , max_x , max_y , name , min_box_size = 20):
        if name != 'cloudy':
            width = abs(max_x - min_x)
            height = abs(max_y - min_y)
            if width < min_box_size:
                diff = min_box_size - width
                min_x -= diff // 2
                max_x += diff // 2
            if height < min_box_size:
                diff = min_box_size - height
                min_y -= diff // 2
                max_y += diff // 2
            # cv.rectangle(self.imgarr, (min_x - 2 , min_y - 2), (max_x + 2 , max_y + 2), (0, 0, 255), 1)
            self.GenerateJson(name=name , topr_x= min_x , topr_y= min_y , botr_x=max_x , botr_y= max_y)
            print(f"Finish Boundary for {name}")
            
        else:
            # cv.rectangle(self.imgarr, (min_x - 2 , min_y - 2), (max_x + 2 , max_y + 2), (0, 0, 255), 1)
            self.GenerateJson(name=name , topr_x=min_x - 2 , topr_y= min_y - 2 , botr_x= max_x + 2 , botr_y= max_y + 2)
            
            print(f"Finish Boundary for {name}")
            
    def Circle1(self , color):
        cv.circle(self.imgarr , (self.center_x , self.center_y) , int(self.stage_radius) , color , -1)
    
    def Circle2(self , color):
        cv.circle(self.imgarr , (self.center_x , self.center_y) , int(self.wafer_radius) , color , -1)
        
    def Circle3(self , color):
        cv.circle(self.imgarr , (self.center_x , self.center_y) , int(self.grid_radius) , color , -1)
    
    def create_img(self):
        self.Circle1(color = random.randint(2,15))
        self.Circle2(color= random.randint(30 , 75))
        self.Circle3(color= random.randint(80 , 95))
        self.DrawGrid(color=random.randint(100 , 150) , spacing= random.randint(10 , 15))
        self.boundary_box = []
        
    def DrawGrid(self, color = 180, spacing=10):
        num_lines = int(2 * self.grid_radius / spacing)

        for i in range(-num_lines // 2, num_lines // 2 + 1):
            offset = i * spacing
            # Horizontal line
            start_x = max(self.center_x - self.grid_radius, self.center_x + offset - self.grid_radius)
            end_x = min(self.center_x + self.grid_radius, self.center_x + offset + self.grid_radius)
            if abs(offset) <= self.grid_radius:
                y = self.center_y + offset
                distance = math.sqrt(self.grid_radius**2 - offset**2)
                start_x = self.center_x - distance
                end_x = self.center_x + distance
                cv.line(self.imgarr, (int(start_x), y), (int(end_x), y), color, 1)

            # Vertical line
            start_y = max(self.center_y - self.grid_radius, self.center_y + offset - self.grid_radius)
            end_y = min(self.center_y + self.grid_radius, self.center_y + offset + self.grid_radius)
            if abs(offset) <= self.grid_radius:
                x = self.center_x + offset
                distance = math.sqrt(self.grid_radius**2 - offset**2)
                start_y = self.center_y - distance
                end_y = self.center_y + distance
                cv.line(self.imgarr, (x, int(start_y)), (x, int(end_y)), color, 1)
    
    def point_in_circle(self , center_x , center_y , radius):
        while True:
            x = np.random.randint(center_x - radius, center_x + radius + 1)
            y = np.random.randint(center_y- radius, center_y + radius + 1)
            if (x - center_x)**2 + (y - center_y)**2 <= radius**2:
                return x, y
            
    def ScratchDefect(self , color = 40 , length = 10):
        
        start_x, start_y = self.point_in_circle(self.center_x , self.center_y , self.grid_radius)
        angle = np.random.uniform(0, 2 * np.pi)
        
        end_x = int(start_x + length * np.cos(angle))
        end_y = int(start_y + length * np.sin(angle))
        
        cv.line(self.imgarr, (start_x, start_y), (end_x, end_y), color, 1)
        
        min_x, max_x = min(start_x, end_x), max(start_x, end_x)
        min_y, max_y = min(start_y, end_y), max(start_y, end_y)
        
        self.DrawBoundary(min_x=min_x , min_y=min_y , max_x=max_x , max_y=max_y  , name="scratch")
        
    
    def SpotDefect(self , color = 50 , radius = 5):
        
        x1, y1 = self.point_in_circle(self.center_x, self.center_y, self.grid_radius)
        
        cv.circle(self.imgarr, (x1, y1), radius, color, -1)
        
        x2, y2 = x1 + radius, y1 + radius
        x1, y1 = x1 - radius, y1 - radius

        self.DrawBoundary(x1, y1, x2, y2, 'spot')

        
    def RadialDefect(self, color=50, num_sides=3, min_size=3, max_size=5):
        center_x, center_y = self.point_in_circle(self.center_x, self.center_y, self.grid_radius)

        size = np.random.randint(min_size, max_size + 1)

        angle_increment = 2 * math.pi / num_sides
        vertices = []

        min_x, min_y, max_x, max_y = float('inf'), float('inf'), 0, 0

        for i in range(num_sides):
            angle = i * angle_increment
            vertex_x = int(center_x + size * math.cos(angle))
            vertex_y = int(center_y + size * math.sin(angle))
            vertices.append((vertex_x, vertex_y))

            min_x, min_y = min(min_x, vertex_x), min(min_y, vertex_y)
            max_x, max_y = max(max_x, vertex_x), max(max_y, vertex_y)

        for i in range(num_sides):
            cv.line(self.imgarr, vertices[i], vertices[(i + 1) % num_sides], color, 1)

        self.DrawBoundary(min_x, min_y, max_x, max_y, 'radial')
        
            
    def CloudyDefect(self , length = 15):
        x1 , y1 = self.point_in_circle(self.center_x , self.center_y , self.grid_radius)
        x2 = int(x1 + length)
        y2 = int(y1 + length)
        if (x2 - self.center_x)**2 + (y2 - self.center_y)**2 <= self.grid_radius**2:
            rect_region = self.imgarr[y1:y2, x1:x2]
            blurred_region = cv.GaussianBlur(rect_region, (5 , 5), 0)
            self.imgarr[y1:y2, x1:x2] = blurred_region
            self.DrawBoundary(x1, y1, x2, y2, 'cloudy')            

    def save_img(self , name):
        cv.imwrite(name , self.imgarr)
    
    def save_json(self , filename):
        data = {
            "version": "5.0.1",
            "flags": {},
            "shapes": self.boundary_box,
            "imagePath": filename,
            "imageData": None,
            "imageHeight": self.img_size,
            "imageWidth": self.img_size
        }
        json_file = os.path.join('data/', os.path.basename(filename).split('.')[0] + '.json')
        with open(json_file, 'w') as json_file:
            json.dump(data, json_file)

# Run Data Generate

In [5]:
import random

num_img = 500


for i in range(num_img):
    
    gr = GenerateData(img_size=601)

    gr.create_img()
    
    random_defects = random.randint(2 , 5)
        
    for j in range(random_defects):
        
        defects = random.randint(1,4)
        
        if defects == 1:
            gr.ScratchDefect(color=random.randint(40 , 60) , length= random.randint(5 , 10))
        if defects == 2:
            gr.RadialDefect(color=random.randint(50 , 70) , min_size= random.randint(2 , 5) , max_size= random.randint(5 , 10))
        if defects == 3:
            gr.CloudyDefect(length=random.randint(10 , 20))
        if defects == 4:
            gr.SpotDefect(color= random.randint(50 , 100) , radius= random.randint(2 , 4))
            
    gr.save_img(name=f'data/test{i+1}.jpg')
    
    gr.save_json(filename=f'test{i+1}.jpg')
    



    


Finish Boundary for cloudy
Finish Boundary for spot
Finish Boundary for cloudy
Finish Boundary for spot
Finish Boundary for spot
Finish Boundary for scratch
Finish Boundary for cloudy
Finish Boundary for cloudy
Finish Boundary for radial
Finish Boundary for cloudy
Finish Boundary for scratch
Finish Boundary for scratch
Finish Boundary for spot
Finish Boundary for scratch
Finish Boundary for spot
Finish Boundary for cloudy
Finish Boundary for scratch
Finish Boundary for spot
Finish Boundary for spot
Finish Boundary for radial
Finish Boundary for scratch
Finish Boundary for radial
Finish Boundary for radial
Finish Boundary for radial
Finish Boundary for scratch
Finish Boundary for scratch
Finish Boundary for scratch
Finish Boundary for scratch
Finish Boundary for cloudy
Finish Boundary for scratch
Finish Boundary for radial
Finish Boundary for spot
Finish Boundary for cloudy
Finish Boundary for scratch
Finish Boundary for scratch
Finish Boundary for radial
Finish Boundary for spot
Finish

# Split Data

In [None]:
# !pip install -U labelme2coco

In [6]:
import os
import shutil
import random
from labelme2coco import get_coco_from_labelme_folder, save_json

def clean_data(folder1, train_folder, test_folder , validation_folder):

    folders = [folder1, train_folder, test_folder , validation_folder]

    for folder in folders:
        if not os.path.exists(folder):
            try:
                os.mkdir(folder)
            except FileExistsError:
                pass

    all_jpg_files = [f for f in os.listdir(folder1) if f.endswith('.jpg')]
    print(len(all_jpg_files))
    train_count = int(len(all_jpg_files) * 0.7)
    test_count = int(len(all_jpg_files) * 0.2)
    random.shuffle(all_jpg_files)
    for img in all_jpg_files[:train_count]:
        shutil.move(os.path.join(folder1 , img) , train_folder)
        corresponding_json = img.replace('.jpg', '.json')
        shutil.move(os.path.join(folder1, corresponding_json), train_folder)
    for img in all_jpg_files[train_count:train_count + test_count]:
        shutil.move(os.path.join(folder1 , img) , test_folder)
        corresponding_json = img.replace('.jpg', '.json')
        shutil.move(os.path.join(folder1, corresponding_json), test_folder)
    for img in all_jpg_files[train_count + test_count:]:
        shutil.move(os.path.join(folder1 , img) , validation_folder)
        corresponding_json = img.replace('.jpg', '.json')
        shutil.move(os.path.join(folder1, corresponding_json), validation_folder)

def convert_coco(foldername , name):
    if name == 'test':
        test_coco = get_coco_from_labelme_folder(foldername)
        save_json(test_coco.json, foldername+"test.json")
        
    elif name == 'train':
        train_coco = get_coco_from_labelme_folder(foldername)
        save_json(train_coco.json, foldername+"train.json")

    else:
        validate_coco = get_coco_from_labelme_folder(foldername)
        save_json(validate_coco.json, foldername+"validate.json")

In [7]:
############ Clean Data #############
data_dir = 'data/'
train_folder = data_dir + 'train/'
test_folder = data_dir + 'test/'
validate_folder = data_dir + 'validation/'

if __name__ == '__main__':
    try:
        if not os.path.exists(data_dir):
          os.makedirs(data_dir)

    except OSError:
        pass

    clean_data(data_dir , train_folder , test_folder , validate_folder)
    convert_coco(train_folder , name= 'train')
    convert_coco(test_folder , name='test')
    convert_coco(validate_folder , name='validate')

500
There are 350 listed files in folder data/train/.


Converting labelme annotations to COCO format: 100%|██████████| 350/350 [00:01<00:00, 217.83it/s]


There are 100 listed files in folder data/test/.


Converting labelme annotations to COCO format: 100%|██████████| 100/100 [00:00<00:00, 226.94it/s]


There are 50 listed files in folder data/validation/.


Converting labelme annotations to COCO format: 100%|██████████| 50/50 [00:00<00:00, 210.31it/s]
