In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import cv2
import matplotlib.pyplot as plt
import glob
from tqdm import tqdm
# from utils.dataloaders import create_dataloader
# from utils.dataloaders import LoadImagesAndLabels
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Dataloaders and dataset utils
"""

import contextlib
import glob
import hashlib
import json
import math
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import Pool, ThreadPool
from pathlib import Path
from threading import Thread
from urllib.parse import urlparse

import numpy as np
import psutil
import torch
import torch.nn.functional as F
import torchvision
import yaml
from PIL import ExifTags, Image, ImageOps
from torch.utils.data import DataLoader, Dataset, dataloader, distributed
from tqdm import tqdm

from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
                                 letterbox, mixup, random_perspective)
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements,
                           check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy,
                           xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
from utils.torch_utils import torch_distributed_zero_first

# Parameters
HELP_URL = 'See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data'
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv('RANK', -1))
PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true'  # global pin_memory for dataloaders


In [None]:

class CustomDataset(Dataset):
    def __init__(self, 
                 path,
                 img_size=640,
                 batch_size=16,
                 augment=False,
                 hyp=None,
                 rect=False,
                 image_weights=False,
                 cache_images=False,
                 single_cls=False,
                 stride=32,
                 pad=0.0,
                 min_items=0,
                 prefix=''):
        self.path = path
        self.im_files = [os.path.join(self.path, filename) for filename in os.listdir(self.path)]
        self.indices = range(len(self.im_files))
        self.img_size = img_size
        # self.batch_size = batch_size
        self.ims = [None] * len(self.im_files)
        self.im_hw0 = [None] * len(self.im_files)
        self.im_hw = [None] * len(self.im_files)
        self.npy_files = []
        self.augment = augment
        self.shapes = np.array([cv2.imread(img_fn).shape[:2] for img_fn in self.im_files])
        self.rect = rect
        self.template = cv2.imread('/home/somusan/somusan/soumyadip/interview/lens_assignment/1_3_crop.tif')
        self.hyp = hyp
        self.albumentations = Albumentations(size=img_size) if augment else None

        # Create indices
        n = len(self.shapes)  # number of images
        bi = np.floor(np.arange(n) / batch_size).astype(int)  # batch index
        nb = bi[-1] + 1  # number of batches
        self.batch = bi  # batch index of image
        self.n = n
        self.indices = range(n)

        #labels temp
        dummy_labels = np.random.rand(1, 5)
        dummy_labels[:, 0] = [0.0]
        self.labels = [dummy_labels] * n


        # Rectangular Training
        if self.rect:
            # Sort by aspect ratio
            s = self.shapes  # wh
            ar = s[:, 1] / s[:, 0]  # aspect ratio
            irect = ar.argsort()
            self.im_files = [self.im_files[i] for i in irect]
            self.shapes = s[irect]  # wh
            ar = ar[irect]

            # Set training image shapes
            shapes = [[1, 1]] * nb
            for i in range(nb):
                ari = ar[bi == i]
                mini, maxi = ari.min(), ari.max()
                if maxi < 1:
                    shapes[i] = [maxi, 1]
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]

            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride
            # print(self.batch_shapes)

            
    def get_img_cropped(self, center_x, center_y, width, height, img):
        x1 = int(center_x - width // 2)
        y1 = int(center_y - height // 2)
        
        x2 = int(x1 + width)
        y2 = int(y1 + height)

        cropped_image = img[y1:y2, x1:x2]
        return cropped_image
    
    def load_image(self, i):
        im, f = self.ims[i], self.im_files[i]
        if im is None:  # not cached in RAM
            im = cv2.imread(f)  # BGR
            assert im is not None, f'Image Not Found {f}'
            h0, w0 = im.shape[:2]  # orig hw
            r = self.img_size / max(h0, w0)  # ratio
            if r != 1:  # if sizes are not equal
                interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
                im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp)
            return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
        return self.ims[i], self.im_hw0[i], self.im_hw[i]  # im, hw_original, hw_resized
    
    def get_labels(self, main_image, template):
        main_gray = cv2.cvtColor(main_image, cv2.COLOR_BGR2GRAY)
        template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)


        template_width, template_height = template_gray.shape[::-1]


        result = cv2.matchTemplate(main_gray, template_gray, cv2.TM_CCORR_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)


        top_left = max_loc
        bottom_right = (top_left[0] + template_width, top_left[1] + template_height)

        center_x = (bottom_right[0] + top_left[0]) // 2
        center_y = (bottom_right[1] + top_left[1]) // 2

        center_point = (center_x, center_y)

        h = bottom_right[1] - top_left[1]
        w = bottom_right[0] - top_left[0]
        
        H, W, C = main_image.shape
        img_size_scale = max(H, W)
        # row = np.array([[0.0, 
        #                  center_x, 
        #                  center_y, 
        #                  w, 
        #                  h]])

        # [bottom_right, top_left]

        return np.array([[0.0, center_x, center_y, w, h]])

    def __len__(self):
        return len(self.im_files)

    def __getitem__(self, index):

        index_list = [i for i in range(0, len(self.im_files))]
        index_list.remove(index)
        rand_idx = random.choice(index_list)

        index = self.indices[index]  # linear, shuffled, or image_weights
        index1 = self.indices[rand_idx]  # linear, shuffled, or image_weights
        
        hyp = self.hyp

        combined_image = np.zeros((self.img_size, self.img_size, 3))
        combo_shapes = None

        # Load image
        img, (h0, w0), (h, w) = self.load_image(index)
        img1, (h01, w01), (h1, w1) = self.load_image(index1)
        # H, W, C = img.shape
        # max_img_size = max(H,W)

        # Letterbox
        shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
        img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
        shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling


        shape1 = self.batch_shapes[self.batch[index1]] if self.rect else self.img_size  # final letterboxed shape
        img1, ratio1, pad1 = letterbox(img1, shape1, auto=False, scaleup=self.augment)
        shapes1 = (h01, w01), ((h1 / h01, w1 / w01), pad1)  # for COCO mAP rescaling
        

        # labels = self.labels[index].copy()
        # labels1 = self.labels[index1].copy()
        labels = self.get_labels(img, self.template)
        labels1 = self.get_labels(img1, self.template)
        
        
        # print("=-----------------------")
        # print(labels1[0][1:]*img.shape[0])
        center_x, center_y, width, height = labels[0][1:]
        center_x1, center_y1, width1, height1 = labels1[0][1:]
        # print(center_x1, center_y1, width1, height1)
        # print(int(center_x1), int(center_y1), int(width1), int(height1))
        cropped_img = self.get_img_cropped(center_x, center_y, width, height, img)
        cropped_img1 = self.get_img_cropped(center_x1, center_y1, width1, height1, img1)
        
        # return [cropped_img, cropped_img1]
        new_width = 100
        new_height = 100
        resized_image1 = cv2.resize(cropped_img, (new_width, new_height))
        resized_image2 = cv2.resize(cropped_img1, (new_width, new_height))

        ht, wd = img.shape[:2]
        max_img_size = max(ht, wd)
        combined_image = np.ones((ht, wd, 3), dtype=np.uint8) * 255

        paste_x1 = random.randint(0, wd - new_width)
        paste_y1 = random.randint(0, ht - new_height)
        paste_x2 = random.randint(0, wd - new_width)
        paste_y2 = random.randint(0, ht - new_height)

        combined_image[paste_y1:paste_y1+new_height, paste_x1:paste_x1+new_width] = resized_image1
        combined_image[paste_y2:paste_y2+new_height, paste_x2:paste_x2+new_width] = resized_image2

        bottom_right = (paste_x1, paste_y1)
        top_left = (paste_x1+new_width, paste_y1+new_height)

        center_x = (bottom_right[0] + top_left[0]) // 2
        center_y = (bottom_right[1] + top_left[1]) // 2
        

        
        # center_point = (center_x, center_y)


        bottom_right1 = (paste_x2, paste_y2)
        top_left1 = (paste_x2+new_width, paste_y2+new_height)

        center_x1 = (bottom_right1[0] + top_left1[0]) // 2
        center_y1 = (bottom_right1[1] + top_left1[1]) // 2


        # center_point1 = (center_x1, center_y1)
        
        # labels = (center_x, center_y, new_width, new_height)
        # labels1 = (center_x1, center_y1, new_width, new_height)
        
        self.labels = np.array([[labels[0][0], center_x, center_y, new_width, new_height],
                        [labels1[0][0], center_x1, center_y1, new_width, new_height]])/max_img_size
        self.labels = self.labels.tolist()

        
        labels_main = np.array([[labels[0][0], center_x, center_y, new_width, new_height],
                            [labels1[0][0], center_x1, center_y1, new_width, new_height]])/max_img_size
        
        
        combo_shape = self.img_size  # final letterboxed shape
        combined_image, combo_ratio, combo_pad = letterbox(combined_image, combo_shape, auto=False, scaleup=self.augment)
        combo_shapes = (h01, w01), ((h1 / h01, w1 / w01), combo_pad)  # for COCO mAP rescaling
        combined_image = combined_image.astype(np.uint8)

        if labels_main.size:# and labels1.size:  # normalized xywh to pixel xyxy format
            labels_main[:, 1:] = xywhn2xyxy(labels_main[:, 1:], combo_ratio[0] * w, combo_ratio[1] * h, padw=combo_pad[0], padh=combo_pad[1])
            # labels1[:, 1:] = xywhn2xyxy(labels1[:, 1:], ratio1[0] * w1, ratio1[1] * h1, padw=pad1[0], padh=pad1[1])
            # print(labels_main)
        
        if self.augment:
            combined_image, labels_main = random_perspective(combined_image,
                                            labels_main,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'],
                                            perspective=hyp['perspective'])

        # print("*******************************")
        # print(combined_image)
        nl = len(labels_main)  # number of labels_main
        # nl1 = len(labels1)  # number of labels_main
        if nl:# and nl1:
            labels_main[:, 1:5] = xyxy2xywhn(labels_main[:, 1:5], w=combined_image.shape[1], h=combined_image.shape[0], clip=True, eps=1E-3)
            # print(labels_main)
        
        # if self.augment:
        #     # Albumentations
        #     img, labels = self.albumentations(img, labels)
        #     nl = len(labels)  # update after albumentations

        #     # HSV color-space
        #     augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

        #     # Flip up-down
        #     if random.random() < hyp['flipud']:
        #         img = np.flipud(img)
        #         if nl:
        #             labels[:, 2] = 1 - labels[:, 2]

        #     # Flip left-right
        #     if random.random() < hyp['fliplr']:
        #         img = np.fliplr(img)
        #         if nl:
        #             labels[:, 1] = 1 - labels[:, 1]

        #     # Cutouts
        #     # labels = cutout(img, labels, p=0.5)
        #     # nl = len(labels)  # update after cutout


        labels_out = torch.zeros((nl, 6))
        # labels_out1 = torch.zeros((nl1, 6))
        if nl:# and nl1:
            labels_out[:, 1:] = torch.from_numpy(labels_main)
            # labels_out1[:, 1:] = torch.from_numpy(labels1)

        # Convert
        combined_image = combined_image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        combined_image = np.ascontiguousarray(combined_image)

        return torch.from_numpy(combined_image), labels_out, self.im_files[index], combo_shapes



In [None]:
dataset_custom = CustomDataset("/home/somusan/somusan/soumyadip/interview/lens_assignment/finger_data/images/train/",img_size=640,batch_size=2, rect=True)
dataloader_data = dataset_custom[3]
dataloader_img = dataloader_data[0].permute(1,2,0).numpy()
dataloader_labels = dataloader_data[1]
print(dataloader_data[1])
plt.imshow(dataloader_img)