In [1]:
import tensorflow as tf
import shutil
import numpy as np
import pandas as pd
import gzip
import cv2
from PIL import Image
import IPython.display as display
%matplotlib inline 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [2]:
class TFRecordExtractor:
    def __init__(self, tfrecord_file):
        self.tfrecord_file = tfrecord_file
        self.count = 0

    def _extract_fn(self,tfrecord):

        # Extract features using the keys set during creation
        feature = {'label': tf.io.FixedLenFeature([], tf.int64),
                   'label_normal': tf.io.FixedLenFeature([], tf.int64),
                   'image': tf.io.FixedLenFeature([], tf.string)}


        # Decode the record read by the reader
        features = tf.io.parse_single_example(tfrecord, features=feature)
        
        # Convert the image data from string back to the numbers
        image = tf.io.decode_raw(features['image'], tf.uint8)
        #image = tf.image.decode_png(features['image'],channels=1)

        label = features['label']

        label_normal = features['label_normal']

        image = tf.reshape(image, [299, 299, 1])

        return [image,label,label_normal]


    def post_process_images(self):
        image_data_list = self.get_images()
        
        b_c_df = pd.DataFrame(columns=['id', 'class', 'normal_class'])

        #id_list = []
        class_list = []
        #normal_class_list = []
        image_list=[]
        print(len(image_data_list))
        images_stacked=np.empty((299,299,700))
        print("Begin extracting in post processing")
        for image_data in image_data_list:
            image_list.append(image_data[0])
            class_list.append(image_data[1])
        print("Done extracting in post processing")
        print("Starting to stack")
        iter_num=len(image_list)//700
        residual=len(image_list)%700
        string_file=self.tfrecord_file
        for i in range(iter_num+1):
            count=0
            if i<iter_num:
                inner_range=range(700*i,(i+1)*700,1)
            else:
                inner_range=range(i*700,i*700+residual,1)
            for j in inner_range:
                images_stacked[:,:,count]=image_list[j].numpy().reshape([299,299])
                count=count+1
            print(count)
            np.savez_compressed(string_file[:string_file.index('.')]+'_'+str(i)+'_'+'count: '+str(count)+'.npz',images_stacked[:,:,0:count])
        
        #id_arr = np.array(id_list)
        #f2=gzip.GzipFile(string_file[:string_file.index('.')]+'_'+'id_arr.npy.gz',"w")
        #np.save(f2,id_arr)
        #f2.close()
        
        class_arr = np.array(class_list)
        np.savez_compressed(string_file[:string_file.index('.')]+'_'+'class_arr.npz',class_arr)
        
        #normal_class_arr = np.array(normal_class_list)
        #plt.imshow(img)
        
    def get_images(self):
        # Initialize all tfrecord paths
    
        dataset = tf.data.TFRecordDataset([self.tfrecord_file])
        dataset = dataset.map(self._extract_fn)
        image_data_list=[]
        for image_features in dataset:
            image_raw=image_features
            image_data_list.append(image_raw)
        return image_data_list

In [4]:
t = TFRecordExtractor('training10_3.tfrecords') 
t.post_process_images()

11177
Begin extracting in post processing
Done extracting in post processing
Starting to stack
700
700
700
700
700
700
700
700
700
700
700
700
700
700
700
677


In [5]:
t = TFRecordExtractor('training10_2.tfrecords') 
t.post_process_images()

11177
Begin extracting in post processing
Done extracting in post processing
Starting to stack
700
700
700
700
700
700
700
700
700
700
700
700
700
700
700
677


In [6]:
t = TFRecordExtractor('training10_1.tfrecords') 
t.post_process_images()

11177
Begin extracting in post processing
Done extracting in post processing
Starting to stack
700
700
700
700
700
700
700
700
700
700
700
700
700
700
700
677


In [7]:
t = TFRecordExtractor('training10_0.tfrecords') 
t.post_process_images()

11177
Begin extracting in post processing
Done extracting in post processing
Starting to stack
700
700
700
700
700
700
700
700
700
700
700
700
700
700
700
677
