In [34]:
# imports
import os
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from glob import glob
from tqdm.notebook import tqdm
# globals
data_path  =os.path.join(os.getcwd(),'data')
train_path =os.path.join(data_path,'Train')
test_path  =os.path.join(data_path,'Test')

# data labeling and formattimg 
'''
    for each folder map the image from 0 to upperlimit
'''
dlist=os.listdir(train_path)
dlist=[int(d) for d in dlist]
dlist.sort()
lower_thresh=dlist[0]
upper_thresh=dlist[-1]
nb_classes=upper_thresh-lower_thresh+1
print("Number of classes:",nb_classes)
dim=64

def preprocessing(_path):
    '''
        preprocesses an image and label
        preprocess ops:
            * data shuffle
        image ops:
            * read
            * resize
            * thresh
            * image to tensor
            * convert to float (special case)
        label ops:
            * extract label from path
            * convert to float (special case)
        
        args:
            _path = the path of test dir / train dir where all the images are separated with corresponding labels
    '''
    X=[]
    y=[]
    img_paths=glob(os.path.join(_path,'*/*.bmp'))
    # shuffle
    random.shuffle(img_paths)
    # iterate
    for img_path in tqdm(img_paths[:5]):
        # extract int label from full path
        label=int(os.path.basename(os.path.dirname(img_path)))-lower_thresh
        # encode label
        encoded_label=np.zeros(nb_classes,dtype=np.float)
        encoded_label[label]=1
        # update
        y.append(encoded_label)

        # image ops
        # read
        img=cv2.imread(img_path,0)
        # resize
        img=cv2.resize(img,(dim,dim))
        # Otsu's thresholding after Gaussian filtering
        blur = cv2.GaussianBlur(img,(5,5),0)
        _,img = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        
        # image to tensor
        x=np.expand_dims(img,axis=-1)
        x=np.expand_dims(x,axis=0)
        # float conversion
        x=x/255.0
        x=1-x
        # debug
        #plt.imshow(np.squeeze(x))
        #plt.show()
        # update
        X.append(x)
        
    # stacking
    X=np.vstack(X)
    y=np.vstack(y)
    return X,y
x_test,y_test=preprocessing(test_path)
print(x_test.shape,y_test.shape)

Number of classes: 50


  0%|          | 0/5 [00:00<?, ?it/s]

(5, 64, 64, 1) (5, 50)


In [1]:
import tensorflow as tf
tf.__version__

'2.4.1'

In [4]:
import cv2
import os 
import random
from tqdm.notebook import tqdm
from glob import glob
import numpy as np 
import matplotlib.pyplot as plt
%matplotlib inline


data_path="/home/nadim/Desktop/ml/data/"
train_path =os.path.join(data_path,'Train')
test_path  =os.path.join(data_path,'Test')
DATA_NUM=512
IMG_DIM =64
IDEN_START=172
img_paths=glob(os.path.join(train_path,'*/*.bmp'))
# shuffle
random.shuffle(img_paths)
# iterate
for img_path in tqdm(img_paths):
    with(open(img_path,'rb')) as fid:
        image_bmp_bytes=fid.read()
    print(image_bmp_bytes)
    # image ops
    # read
    img=cv2.imread(img_path,0)
    # resize
    img=cv2.resize(img,(IMG_DIM,IMG_DIM))
    # Otsu's thresholding after Gaussian filtering
    blur = cv2.GaussianBlur(img,(5,5),0)
    _,img = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    # Png encoded data
    _,img_coded = cv2.imencode('.png',img)
    # Byte conversion
    image_png_bytes = img_coded.tobytes()
    print(image_png_bytes)
    break



  0%|          | 0/12000 [00:00<?, ?it/s]

b'BM\xb6\x1f\x00\x00\x00\x00\x00\x006\x00\x00\x00(\x00\x00\x008\x00\x00\x000\x00\x00\x00\x01\x00\x18\x00\x00\x00\x00\x00\x80\x1f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f

In [None]:
def to_tfrecord(img_paths,save_dir,mode,r_num):
    '''
      Creates tfrecords from Provided Image Paths
    '''
    tfrecord_name=f'{r_num}.tfrecord'
    tfrecord_path=os.path.join(save_dir,mode,tfrecord_name) 
    
    with tf.io.TFRecordWriter(tfrecord_path) as writer:    
        
        for img_path in tqdm(img_paths):
            
            #label
            label=get_label(img_path)
            # image ops
            # read
            img=cv2.imread(img_path,0)
            # resize
            img=cv2.resize(img,(IMG_DIM,IMG_DIM))
            # Otsu's thresholding after Gaussian filtering
            blur = cv2.GaussianBlur(img,(5,5),0)
            _,img = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
            # Png encoded data
            _,img_coded = cv2.imencode('.png',img)
            # Byte conversion
            image_png_bytes = img_coded.tobytes()

            # feature desc
            data ={ 'image':_bytes_feature(image_png_bytes),
                    'label':_int64_feature(label)
            }
            
            features=tf.train.Features(feature=data)
            example= tf.train.Example(features=features)
            serialized=example.SerializeToString()
            writer.write(serialized)  


In [5]:
for idx in range(0,10000,DATA_NUM):
    print(idx)

0
512
1024
1536
2048
2560
3072
3584
4096
4608
5120
5632
6144
6656
7168
7680
8192
8704
9216
9728


In [6]:
img_paths=glob(os.path.join(train_path,'*/*.bmp'))
len(img_paths)

12000

In [7]:
img_paths=img_paths[512:1024]
print(len(img_paths))

512


In [9]:
225//57

3.9473684210526314