In [62]:
from pycococreatortools import pycococreatortools
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np


class Utils(object):

    def __init__(self, ROOT_DIR):
        ''' 
        ROOT_DIR: Directory Containing Imgs and Masks
        This Class helps to convert Annotations to COCO Formate.
        So, All required operations are been covered in this class.
        
        NOTE: Under ROOT_DIR there should folder named "Images"
        and "Masks" containing images and masks respectively.
        '''

        self.ROOT_DIR = ROOT_DIR
        self.IMAGE_DIR = os.path.join(self.ROOT_DIR, "Images")
        self.ANNOTATION_DIR = os.path.join(self.ROOT_DIR, "Masks")

        self.INFO = {
            "description": "Example Dataset",
            "url": "https://github.com/waspinator/pycococreator",
            "version": "0.1.0",
            "year": 2018,
            "contributor": "waspinator",
            "date_created": datetime.datetime.utcnow().isoformat(' ')
        }

        self.LICENSES = [
            {
                "id": 1,
                "name": "Attribution-NonCommercial-ShareAlike License",
                "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
            }
        ]

        self.CATEGORIES = [
            {
                'id': 1,
                'name': 'Rail_road',
                'supercategory': 'shape',
            }
        ]

    def check_non_img_files(self, directory_path):
        file_types = ['.jpg', '.png', '.jpeg']
        for f in os.listdir(directory_path):
            if os.path.splitext(f)[1] not in file_types:
                return f

    def filter_for_jpeg(self, root, files):
        file_types = ['*.jpeg', '*.jpg']
        file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
        files = [os.path.join(root, f) for f in files]
        files = [f for f in files if re.match(file_types, f)]

        return files

    def filter_for_annotations(self, root, files, image_filename):
        file_types = ['*.png']
        file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
        basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
        file_name_prefix = basename_no_extension + '.*'
        files = [os.path.join(root, f) for f in files]
        files = [f for f in files if re.match(file_types, f)]
        files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]

        return files

    def add_class_name_to_files_names(self):

        files = os.listdir(self.ANNOTATION_DIR)
        for index, file in enumerate(files):
            if '_Rail_road' in file:
                pass

            else:
                os.rename(os.path.join(self.ANNOTATION_DIR, file),
                          os.path.join(self.ANNOTATION_DIR, file.split('.')[0] + '_Rail_road' + '.png'))

            if (os.path.isdir(os.path.join(self.ANNOTATION_DIR, file))) == True:
                os.rmdir(os.path.join(self.ANNOTATION_DIR, file))

    def export_to_COCO(self, dataset_type='train'):

        coco_output = {
            "info": self.INFO,
            "licenses": self.LICENSES,
            "categories": self.CATEGORIES,
            "images": [],
            "annotations": []
        }

        image_id = 1
        segmentation_id = 1

        # filter for jpeg images
        for root, _, files in os.walk(self.IMAGE_DIR):
            image_files = self.filter_for_jpeg(root, files)

            # go through each image
            for image_filename in image_files:
                image = Image.open(image_filename)
                image_info = pycococreatortools.create_image_info(
                    image_id, os.path.basename(image_filename), image.size)
                coco_output["images"].append(image_info)

                # filter for associated png annotations
                for root, _, files in os.walk(self.ANNOTATION_DIR):
                    annotation_files = self.filter_for_annotations(root, files, image_filename)

                    # go through each associated annotation
                    for annotation_filename in annotation_files:

#                         print(annotation_filename)

                        class_id = [x['id'] for x in self.CATEGORIES if x['name'] in annotation_filename][0]

                        category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
                        binary_mask = np.asarray(Image.open(annotation_filename)
                                                 .convert('1')).astype(np.uint8)

                        annotation_info = pycococreatortools.create_annotation_info(
                            segmentation_id, image_id, category_info, binary_mask,
                            image.size, tolerance=2)

                        if annotation_info is not None:
                            coco_output["annotations"].append(annotation_info)

                        segmentation_id = segmentation_id + 1

                image_id = image_id + 1

        with open(os.path.join(self.ROOT_DIR, dataset_type + '.json'), 'w') as output_json_file:
            json.dump(coco_output, output_json_file)



b = Utils('Data/Test_Real_Time')


In [25]:
2000*0.7 , 2000*0.25, 2000*0.05

(1400.0, 500.0, 100.0)

In [63]:
b.check_non_img_files(b.ANNOTATION_DIR)
b.check_non_img_files(b.IMAGE_DIR)

In [65]:
!rm Data/Test_Real_Time/Images/.ipynb_checkpoints

/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
rm: cannot remove 'Data/Test_Real_Time/Images/.ipynb_checkpoints': No such file or directory


In [None]:
# Do this operation to remove .ipynb_checkpoints
!rm -r Rail_Segmentation/Masks/.ipynb_checkpoints

In [66]:
# b.add_class_name_to_files_names()
# b.export_to_COCO(dataset_type = 'Test_Real_Time')

In [24]:
import shutil
import os


class Divide_dataset(object):
    def __init__(self, main_dir,destination,thress):
        ''' 
        Divide_dataset is for dividing dataset into Train, Test, Validation
        main_dir : Directory containing all files Eg: /Images OR /Masks
        destination: Directory path to move files.
        '''
        count = 0  
        get_files = os.listdir(main_dir)
        get_files.sort()
        for i in get_files:
            
#             shutil.move(os.path.join(main_dir,i),destination)

            if count == thress:
                break
            count = count + 1

    def check(self,path_to_set):
        ''' 
        Check whether divided set contains required no of imgs and masks accordingly.
        path_to_set : Path of directory containing Images and Masks folder.
        '''
        print("No of Images = >", len(os.listdir(path_to_set+'/Images')))
        print("No of Masks = >",len(os.listdir(path_to_set+'/Masks')))
        
       
        masks_dir = os.path.join(path_to_set,'Masks')
        imgs_dir    = os.path.join(path_to_set+'/Images')

        mask = [i.split('.')[0] for i in os.listdir(masks_dir)]
        mask = [j.split('_')[0] for j in mask]

        imgs =[i.split('.')[0] for i in os.listdir(imgs_dir)]

        if not list(set(imgs).difference(mask)):
            print("All files are successfully Moved!")

 
d= Divide_dataset("Data/Train/Images", "Data/Images",100)
d.check("Data/Train")

No of Images = > 1397
No of Masks = > 1397
All files are successfully Moved!
