In [1]:
import os
import glob
import pandas as pd
import io
import argparse
import tensorflow.compat.v1 as tf
from PIL import Image
from object_detection.utils import dataset_util, label_map_util
from collections import namedtuple

In [2]:
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

In [3]:
def create_tf_example(group, path):
    """
    convert each sample and return a TFRecord
    """
    with tf.gfile.GFile(os.path.join(path, '{}.jpg'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

In [4]:
def class_text_to_int(row_label):
    return label_map_dict[row_label]


In [5]:
LABELMAP_LOCATION="../workspace/training_demo/annotations/labelmap.pbtxt"

from object_detection.utils import dataset_util, label_map_util
label_map = label_map_util.load_labelmap(LABELMAP_LOCATION)
label_map_dict = label_map_util.get_label_map_dict(label_map)

label_map_dict

{'fruit_healthy': 1, 'fruit_brownspot': 2, 'fruit_woodiness': 3}

In [6]:
from os import listdir
from os.path import isfile, join
DATA_LOCATION="/data/fruit_disease_detection/OUT_dir"
TRAIN_IMAGES=os.path.join(DATA_LOCATION,"train")
TRAIN_CSV_LOCATION=os.path.join(DATA_LOCATION,"Train.csv")

TFR_LOCATION=os.path.join(DATA_LOCATION,"train_tfrecords")
  
writer=tf.python_io.TFRecordWriter(TFR_LOCATION)
train_df=pd.read_csv(TRAIN_CSV_LOCATION)
train_df['xmax']=train_df['width']+train_df['xmin']
train_df['ymax']=train_df['height']+train_df['ymin']


grouped=split(train_df,"Image_ID")

path=TRAIN_IMAGES

onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
# onlyfiles

for group in grouped:
    if "{}.jpg".format(group.filename) in onlyfiles:
        tf_example=create_tf_example(group,path)
        writer.write(tf_example.SerializeToString())
        print("WROTE ",group.filename)
    else:
        print("skipped ",group.filename)

writer.close()
print("Successfully created TFRecord file: {}".format(TFR_LOCATION))


WROTE  ID_007FAIEI
WROTE  ID_00G8K1V3
WROTE  ID_00WROUT9
WROTE  ID_00ZJEEK3
WROTE  ID_018UIENR
skipped  ID_01QZDXVQ
WROTE  ID_02PX7GI8
WROTE  ID_03WE3EVQ
WROTE  ID_052XYWUM
WROTE  ID_055U1LPM
WROTE  ID_058OQ45J
skipped  ID_05A7I1ZP
WROTE  ID_05G9F711
WROTE  ID_05R05ZLJ
WROTE  ID_05V8XX4L
WROTE  ID_06EXUMWA
skipped  ID_06GLI7SR
WROTE  ID_06KI6V1N
WROTE  ID_06N0H7F1
WROTE  ID_07HGJOFB
WROTE  ID_07MFGPW3
WROTE  ID_08A3G5RD
WROTE  ID_08K42EBS
WROTE  ID_08PV7SQB
WROTE  ID_09C4OIQT
skipped  ID_09WS71T3
WROTE  ID_0A1VFEP3
WROTE  ID_0ACABJ4D
WROTE  ID_0APCI9O1
WROTE  ID_0B07EJ8B
WROTE  ID_0BFNU714
WROTE  ID_0C374A9P
WROTE  ID_0CISYX8S
skipped  ID_0CXLC2N5
WROTE  ID_0CY1QTWS
WROTE  ID_0DMO6F68
WROTE  ID_0EPVFVTF
WROTE  ID_0F7W3N7K
WROTE  ID_0FNZJF6B
WROTE  ID_0FP6JEZQ
WROTE  ID_0GBCSTHR
WROTE  ID_0H3KHKRV
WROTE  ID_0H68C2ZJ
WROTE  ID_0HUIQ8FI
WROTE  ID_0HWBQ5O5
WROTE  ID_0J2DOODF
WROTE  ID_0JAK8QSJ
WROTE  ID_0JOUSB83
WROTE  ID_0KYI1FGP
WROTE  ID_0LMJ61A2
WROTE  ID_0LO14LKW
WROTE  ID_0LZMACHS
WR

In [7]:
EVAL_TFR_LOCATION=os.path.join(DATA_LOCATION,"val_tfrecords")
EVAL_IMAGES=os.path.join(DATA_LOCATION,"val")
path=EVAL_IMAGES
filesInEval=[f for f in listdir(path) if isfile(join(path, f))]
filesInEval
ewriter=tf.python_io.TFRecordWriter(EVAL_TFR_LOCATION)
edf=train_df
#edf has all the required fields.
edf.head()

egrouped=split(edf,"Image_ID")
for group in egrouped:
    if "{}.jpg".format(group.filename) in filesInEval:
        tf_example=create_tf_example(group,path)
        ewriter.write(tf_example.SerializeToString())
        print("WROTE ",group.filename)
    else:
        print("skipped ",group.filename)

ewriter.close()
print("Successfully created TFRecord file: {}".format(EVAL_TFR_LOCATION))

skipped  ID_007FAIEI
skipped  ID_00G8K1V3
skipped  ID_00WROUT9
skipped  ID_00ZJEEK3
skipped  ID_018UIENR
skipped  ID_01QZDXVQ
skipped  ID_02PX7GI8
skipped  ID_03WE3EVQ
skipped  ID_052XYWUM
skipped  ID_055U1LPM
skipped  ID_058OQ45J
WROTE  ID_05A7I1ZP
skipped  ID_05G9F711
skipped  ID_05R05ZLJ
skipped  ID_05V8XX4L
skipped  ID_06EXUMWA
WROTE  ID_06GLI7SR
skipped  ID_06KI6V1N
skipped  ID_06N0H7F1
skipped  ID_07HGJOFB
skipped  ID_07MFGPW3
skipped  ID_08A3G5RD
skipped  ID_08K42EBS
skipped  ID_08PV7SQB
skipped  ID_09C4OIQT
WROTE  ID_09WS71T3
skipped  ID_0A1VFEP3
skipped  ID_0ACABJ4D
skipped  ID_0APCI9O1
skipped  ID_0B07EJ8B
skipped  ID_0BFNU714
skipped  ID_0C374A9P
skipped  ID_0CISYX8S
WROTE  ID_0CXLC2N5
skipped  ID_0CY1QTWS
skipped  ID_0DMO6F68
skipped  ID_0EPVFVTF
skipped  ID_0F7W3N7K
skipped  ID_0FNZJF6B
skipped  ID_0FP6JEZQ
skipped  ID_0GBCSTHR
skipped  ID_0H3KHKRV
skipped  ID_0H68C2ZJ
skipped  ID_0HUIQ8FI
skipped  ID_0HWBQ5O5
skipped  ID_0J2DOODF
skipped  ID_0JAK8QSJ
skipped  ID_0JOUSB83


In [8]:
TEST_TFR_LOCATION=os.path.join(DATA_LOCATION,"test_tfrecords")
TEST_IMAGES=os.path.join(DATA_LOCATION,"test")
path=TEST_IMAGES
filesInEval=[f for f in listdir(path) if isfile(join(path, f))]
filesInEval
ewriter=tf.python_io.TFRecordWriter(TEST_TFR_LOCATION)
edf=train_df
#edf has all the required fields.
edf.head()

egrouped=split(edf,"Image_ID")
for group in egrouped:
    if "{}.jpg".format(group.filename) in filesInEval:
        tf_example=create_tf_example(group,path)
        ewriter.write(tf_example.SerializeToString())
        print("WROTE ",group.filename)
    else:
        print("skipped ",group.filename)

ewriter.close()
print("Successfully created TFRecord file: {}".format(TEST_TFR_LOCATION))

skipped  ID_007FAIEI
skipped  ID_00G8K1V3
skipped  ID_00WROUT9
skipped  ID_00ZJEEK3
skipped  ID_018UIENR
WROTE  ID_01QZDXVQ
skipped  ID_02PX7GI8
skipped  ID_03WE3EVQ
skipped  ID_052XYWUM
skipped  ID_055U1LPM
skipped  ID_058OQ45J
skipped  ID_05A7I1ZP
skipped  ID_05G9F711
skipped  ID_05R05ZLJ
skipped  ID_05V8XX4L
skipped  ID_06EXUMWA
skipped  ID_06GLI7SR
skipped  ID_06KI6V1N
skipped  ID_06N0H7F1
skipped  ID_07HGJOFB
skipped  ID_07MFGPW3
skipped  ID_08A3G5RD
skipped  ID_08K42EBS
skipped  ID_08PV7SQB
skipped  ID_09C4OIQT
skipped  ID_09WS71T3
skipped  ID_0A1VFEP3
skipped  ID_0ACABJ4D
skipped  ID_0APCI9O1
skipped  ID_0B07EJ8B
skipped  ID_0BFNU714
skipped  ID_0C374A9P
skipped  ID_0CISYX8S
skipped  ID_0CXLC2N5
skipped  ID_0CY1QTWS
skipped  ID_0DMO6F68
skipped  ID_0EPVFVTF
skipped  ID_0F7W3N7K
skipped  ID_0FNZJF6B
skipped  ID_0FP6JEZQ
skipped  ID_0GBCSTHR
skipped  ID_0H3KHKRV
skipped  ID_0H68C2ZJ
skipped  ID_0HUIQ8FI
skipped  ID_0HWBQ5O5
skipped  ID_0J2DOODF
skipped  ID_0JAK8QSJ
skipped  ID_0JO

In [4]:

DATA_LOCATION="/progs/finiteloop/tfod/workspace/training_demo/images/"
TRAIN_IMAGES=os.path.join(DATA_LOCATION,"train")
TRAIN_CSV_LOCATION=os.path.join(DATA_LOCATION,"Train.csv")
TEST_IMAGES=os.path.join(DATA_LOCATION,"test")
TEST_CSV_LOCATION=os.path.join(DATA_LOCATION,"Test.csv")
train_df=pd.read_csv(TRAIN_CSV_LOCATION)
train_df.head()

Unnamed: 0,Image_ID,class,xmin,ymin,width,height
0,ID_007FAIEI,fruit_woodiness,87.0,87.5,228.0,311.0
1,ID_00G8K1V3,fruit_brownspot,97.5,17.5,245.0,354.5
2,ID_00WROUT9,fruit_brownspot,156.5,209.5,248.0,302.5
3,ID_00ZJEEK3,fruit_healthy,125.0,193.0,254.5,217.0
4,ID_018UIENR,fruit_brownspot,79.5,232.5,233.5,182.0


In [5]:
train_df['xmax']=train_df['width']+train_df['xmin']
train_df['ymax']=train_df['height']+train_df['ymin']
train_df.head()

Unnamed: 0,Image_ID,class,xmin,ymin,width,height,xmax,ymax
0,ID_007FAIEI,fruit_woodiness,87.0,87.5,228.0,311.0,315.0,398.5
1,ID_00G8K1V3,fruit_brownspot,97.5,17.5,245.0,354.5,342.5,372.0
2,ID_00WROUT9,fruit_brownspot,156.5,209.5,248.0,302.5,404.5,512.0
3,ID_00ZJEEK3,fruit_healthy,125.0,193.0,254.5,217.0,379.5,410.0
4,ID_018UIENR,fruit_brownspot,79.5,232.5,233.5,182.0,313.0,414.5


In [6]:
grouped=split(train_df,"Image_ID")


In [7]:
grouped[0].filename

'ID_007FAIEI'

In [13]:
create_tf_example(grouped[90],TRAIN_IMAGES)

features {
  feature {
    key: "image/encoded"
    value {
      bytes_list {
        value: "\377\330\377\340\000\020JFIF\000\001\001\000\000\001\000\001\000\000\377\333\000C\000\002\001\001\001\001\001\002\001\001\001\002\002\002\002\002\004\003\002\002\002\002\005\004\004\003\004\006\005\006\006\006\005\006\006\006\007\t\010\006\007\t\007\006\006\010\013\010\t\n\n\n\n\n\006\010\013\014\013\n\014\t\n\n\n\377\333\000C\001\002\002\002\002\002\002\005\003\003\005\n\007\006\007\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\377\300\000\021\010\002\000\002\000\003\001\"\000\002\021\001\003\021\001\377\304\000\037\000\000\001\005\001\001\001\001\001\001\000\000\000\000\000\000\000\000\001\002\003\004\005\006\007\010\t\n\013\377\304\000\265\020\000\002\001\003\003\002\004\003\005\005\004\004\000\000\001}\001\002\003\000\004\021\005\022!1A\006\023Qa\007\"q\0242\201\221\241\010#B\261\301\025R\321\360$3br\202\t\n\026\027\030\031\032%&\'()*4

Create TFRecords for training data

Create test TFRecords