In [248]:
import pathlib
import os

import matplotlib.pyplot as plt
import time
import numpy as np
import pandas as pd
import scipy.ndimage as ndimage
import tensorflow as tf
from tensorflow.keras.layers import Dense,GlobalMaxPooling2D,Dropout,Flatten,BatchNormalization,Activation
from tensorflow.keras.applications import NASNetLarge,VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.applications.nasnet import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array,load_img,ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

In [249]:
tf.__version__

'2.2.0'

In [250]:
folder = './datasets/AGGDATA'
GCS_PATTERN = folder + '/img/'
batch_size1=128
IMAGE_SIZE = [331, 331]
VALIDATION_SPLIT = 0.3

In [9]:
filenames = tf.io.gfile.glob(GCS_PATTERN)
split = int(len(filenames) * VALIDATION_SPLIT)
training_filenames = filenames[split:]
validation_filenames = filenames[:split]
print("Pattern matches {} data files. Splitting dataset into {} training files and {} validation files".format(len(filenames), len(training_filenames), len(validation_filenames)))
validation_steps = int(len(validation_filenames)/ batch_size1)
steps_per_epoch = int(len(training_filenames)/ batch_size1)
print("With a batch size of {}, there will be {} batches per training epoch and {} batch(es) per validation run.".format(batch_size1, steps_per_epoch, validation_steps))

Pattern matches 15771 data files. Splitting dataset into 11040 training files and 4731 validation files
With a batch size of 128, there will be 86 batches per training epoch and 36 batch(es) per validation run.


In [16]:
train_label_df = pd.read_csv(folder + '/All_Ratings.csv', delimiter=',', header=None, names=['id', 'score'])

In [93]:
score_tensor = tf.convert_to_tensor(train_label_df['score'].astype('float32'))
tf.convert_to_tensor(train_label_df['score'].astype('float32'))

<tf.Tensor: shape=(7816,), dtype=float32, numpy=array([5.133, 5.55 , 5.178, ..., 6.622, 3.87 , 5.04 ], dtype=float32)>

In [18]:
img_tensor = []
for i in range(len(train_label_df['id'])):
    image = tf.io.read_file(GCS_PATTERN + train_label_df['id'][i])
    image = tf.image.decode_jpeg(image)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_flip_left_right(image)
    image = ndimage.rotate(image, np.random.uniform(-15, 15), reshape=False)
    image = tf.image.resize_with_pad(image, 331,331 , method='lanczos3')
    img_tensor.append(image)

In [None]:
id_tensor = tf.convert_to_tensor(img_tensor)
id_tensor

In [80]:
dataset = tf.data.Dataset.from_tensor_slices((img_tensor, score_tensor))

In [251]:
dic = {}
for i in range(len(train_label_df)):
    dic[train_label_df['id'][i]]  =  train_label_df['score'][i]

In [252]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [253]:
def image_example(image_string, label):
  image_shape = tf.image.decode_jpeg(image_string).shape

  feature = {
      'height': _int64_feature(image_shape[0]),
      'width': _int64_feature(image_shape[1]),
      'depth': _int64_feature(image_shape[2]),
      'id': _float_feature(label),
      'image_raw': _bytes_feature(image_string),
  }

  return tf.train.Example(features=tf.train.Features(feature=feature))

In [254]:
record_file = 'behold.tfrecords'

In [255]:
with tf.io.TFRecordWriter(record_file) as writer:
  for filename, label in dic.items():
    image_string = open(GCS_PATTERN + filename, 'rb').read()
    tf_example = image_example(image_string, label)
    writer.write(tf_example.SerializeToString())

In [149]:
type(open(GCS_PATTERN + 'AM829.jpg', 'rb').read())

bytes

In [256]:
tf_example

features {
  feature {
    key: "depth"
    value {
      int64_list {
        value: 3
      }
    }
  }
  feature {
    key: "height"
    value {
      int64_list {
        value: 350
      }
    }
  }
  feature {
    key: "id"
    value {
      float_list {
        value: 5.039999961853027
      }
    }
  }
  feature {
    key: "image_raw"
    value {
      bytes_list {
        value: "\377\330\377\340\000\020JFIF\000\001\001\000\000\001\000\001\000\000\377\333\000C\000\002\001\001\001\001\001\002\001\001\001\002\002\002\002\002\004\003\002\002\002\002\005\004\004\003\004\006\005\006\006\006\005\006\006\006\007\t\010\006\007\t\007\006\006\010\013\010\t\n\n\n\n\n\006\010\013\014\013\n\014\t\n\n\n\377\333\000C\001\002\002\002\002\002\002\005\003\003\005\n\007\006\007\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\377\300\000\021\010\001^\001^\003\001\"\000\002\021\001\003\021\001\377\304\000\037\000\000\001\005\001\001\001\001\001\0