#### Used Dataset 
- ../datasets/attribute_set/custom_attr.csv  

In [6]:
from helpers import read_csv_with_dtypes 
import pandas as pd 

data = read_csv_with_dtypes("../datasets/attribute_set/custom_attr.csv")

In [7]:
data.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289222 entries, 0 to 289221
Columns: 1001 entries, paths to zippered
dtypes: int8(1000), object(1)
memory usage: 278.0+ MB


In [9]:
data.head() 

Unnamed: 0,paths,a-line,abstract,abstract chevron,abstract chevron print,abstract diamond,abstract floral,abstract floral print,abstract geo,abstract geo print,...,zeppelin,zig,zigzag,zip,zip-front,zip-pocket,zip-up,zipped,zipper,zippered
0,img/Sheer_Pleated-Front_Blouse/img_00000001.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,img/Sheer_Pleated-Front_Blouse/img_00000002.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,img/Sheer_Pleated-Front_Blouse/img_00000003.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,img/Sheer_Pleated-Front_Blouse/img_00000004.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,img/Sheer_Pleated-Front_Blouse/img_00000005.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [104]:
LABELS = list(data.columns.values[1:])
LABELS[:5]

['a-line',
 'abstract',
 'abstract chevron',
 'abstract chevron print',
 'abstract diamond']

In [105]:
len(LABELS)

1000

In [51]:
base = "../datasets/big_ds/img-001/" 
base

'../datasets/big_ds/img-001/'

In [62]:
data.paths = data.paths.apply(lambda x: base + x) 
data.head() 

Unnamed: 0,paths,a-line,abstract,abstract chevron,abstract chevron print,abstract diamond,abstract floral,abstract floral print,abstract geo,abstract geo print,...,zeppelin,zig,zigzag,zip,zip-front,zip-pocket,zip-up,zipped,zipper,zippered
0,../datasets/big_ds/img-001/img/Sheer_Pleated-F...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,../datasets/big_ds/img-001/img/Sheer_Pleated-F...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,../datasets/big_ds/img-001/img/Sheer_Pleated-F...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,../datasets/big_ds/img-001/img/Sheer_Pleated-F...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,../datasets/big_ds/img-001/img/Sheer_Pleated-F...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Prepare Data Pipeline by using tf.data 

In [63]:
fnames = data.paths.to_numpy()  
fnames[:5]

array(['../datasets/big_ds/img-001/img/Sheer_Pleated-Front_Blouse/img_00000001.jpg',
       '../datasets/big_ds/img-001/img/Sheer_Pleated-Front_Blouse/img_00000002.jpg',
       '../datasets/big_ds/img-001/img/Sheer_Pleated-Front_Blouse/img_00000003.jpg',
       '../datasets/big_ds/img-001/img/Sheer_Pleated-Front_Blouse/img_00000004.jpg',
       '../datasets/big_ds/img-001/img/Sheer_Pleated-Front_Blouse/img_00000005.jpg'],
      dtype=object)

In [56]:
import tensorflow as tf 

ds_size = data.shape[0] 
number_of_selected_samples = 2000 

filelist_ds = tf.data.Dataset.from_tensor_slices(fnames[:number_of_selected_samples]) 

filelist_ds.cardinality().numpy() 

2000

## Custom tf Helpers 

In [82]:
def get_label(file_path):
    """
        file_path: the file path for the image that you want to select
    """
    labels = data.loc[data.paths == file_path].to_numpy().squeeze()[1:].astype("float32")
    return tf.convert_to_tensor(labels) 

In [None]:
# get_label(fnames[0])

In [84]:
# resize and scale the images so that we can save time in training  
IMG_WIDTH, IMG_HEIGHT = 64, 64 
def decode_img(img):
    """
        img: img is the image 
    """ 
    #color images 
    img = tf.image.decode_jpeg(img, channels=3) 
    img = tf.image.convert_image_dtype(img, tf.float32) 
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT]) 

In [85]:
def combine_images_labels(file_path: tf.Tensor): 
    label = get_label(file_path) 
    img = tf.io.read_file(file_path) 
    img = decode_img(img) 
    return img, label 

# Train/Test Split 

In [96]:
train_ratio = 0.80 
ds_train = filelist_ds.take(int(number_of_selected_samples * train_ratio)) 
ds_test = filelist_ds.skip(int(number_of_selected_samples * train_ratio))

In [59]:
BATCH_SIZE = 32 

## Pre-process All the Images 

In [97]:
ds_train = ds_train.map(lambda x: 
                        tf.py_function(func=combine_images_labels, 
                                       inp=[x], # input of the function 
                                       Tout=(tf.float32,tf.int64)),  # return type 
                        num_parallel_calls=tf.data.AUTOTUNE, # parallelizing data extraction 
                        deterministic=False 
                        )

In [98]:
ds_test= ds_test.map(lambda x: tf.py_function(func=combine_images_labels,
          inp=[x], Tout=(tf.float32,tf.int64)),
          num_parallel_calls=tf.data.AUTOTUNE,
          deterministic=False)

### Prepare Data Pipeline 

- **batch**(): Combines consecutive elements of this dataset into batches.
- **cache**(): Caches the elements in this dataset. he first time the dataset is iterated over, its elements will be cached either in the specified file or in memory.Subsequent iterations will use the cached data.
- **prefetch**(): Creates a Dataset that prefetches elements from this dataset. Most dataset input pipelines should end with a call to *prefetch*. This allows later elements to be prepared while the current element is being processed. This often improves latency and throughput, at the cost of using additional memory to store prefetched elements.
 

In [99]:
ds_train_batched = ds_train.batch(BATCH_SIZE).cache().prefetch(tf.data.experimental.AUTOTUNE) 
ds_test_batched = ds_test.batch(BATCH_SIZE).cache().prefetch(tf.data.experimental.AUTOTUNE) 

In [100]:
ds_train_batched.cardinality().numpy() 

50

## Create a Keras CNN model by using Transfer learning

In [101]:
from tensorflow import keras 
base_model = keras.applications.VGG16(
    weights="imagenet", # load weights pre-trained on ImageNet. 
    input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), # VGG16 expects min 32 x 32 
    include_top = False # do not include output layer of the image net vgg 
)
base_model.trainable = False 

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [106]:
nr_of_classes = len(LABELS) 
nr_of_classes

1000

In [108]:
inputs = keras.Input(shape=(IMG_WIDTH,IMG_HEIGHT,3)) 
x = base_model(inputs) 
x = keras.layers.GlobalAveragePooling2D()(x) 

initializer = tf.keras.initializers.GlorotUniform(seed=42) 
activation = tf.keras.activations.sigmoid  

outputs = keras.layers.Dense(nr_of_classes,
                             kernel_initializer=initializer, 
                             activation=activation)(x) 

model = keras.Model(inputs, outputs) 

## Compile and Train the Model 

In [110]:
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.BinaryCrossentropy(), # default from_logits=False
              metrics=[keras.metrics.BinaryAccuracy()])

In [111]:
model.fit(ds_train_batched, 
        validation_data=ds_test_batched, 
        epochs=100)

Epoch 1/100
