In [13]:
import os

In [14]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

In [15]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

import tensorflow as tf
from tensorflow import keras

%matplotlib inline


from keras.applications.inception_v3 import InceptionV3, preprocess_input

from itertools import islice

In [16]:
!pip install tqdm



In [17]:
from tqdm import tqdm

In [6]:
train_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_ds = train_gen.flow_from_directory(
    "../../clothing-dataset-small/train/", 
    target_size=(299, 299),
    batch_size=1,
    shuffle = False #This has to be false both because it is unecessary to shuffle the data and also for extracting valid filenames
)

Found 3068 images belonging to 10 classes.


In [18]:
train_df = pd.DataFrame()
labels = []

In [19]:
inceptionV3_model = InceptionV3(
                        weights="imagenet", 
                        include_top=False,
                        input_shape=(299, 299, 3)
                         )

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [20]:
#Try one image and check the output

# Single image for testing
PATH = "../../clothing-dataset-small/train/t-shirt"
FILE = "0285f2a0-ff21-43de-9762-6454faa5eef8.jpg"

FULL_PATH = f"{PATH}/{FILE}"

img = load_img(FULL_PATH, target_size=(299, 299))
x = np.array(img)

X = np.array([x])  # expects batch of images
X = preprocess_input(X)

pred = inceptionV3_model.predict(X)

pred.shape



(1, 8, 8, 2048)

In [21]:
features_pooled = keras.layers.GlobalAveragePooling2D()(pred)

features_pooled.shape

TensorShape([1, 2048])

In [25]:
def inception_v3_features(input_image):
    incv3_model = InceptionV3(
        weights="imagenet", include_top=False, input_shape=(299, 299, 3)
    )

    # Extract features from images
    features = incv3_model.predict(input_image, verbose = 0)

    # Pool them
    features_pooled = keras.layers.GlobalAveragePooling2D()(features)

    return features_pooled

In [51]:
num_per_batch = 500

In [58]:
train_df.shape #Found 3068 images belonging to 10 classes.

(3100, 2048)

In [59]:
train_df.duplicated().sum()

32

In [60]:
3100-3068

32

In [57]:
for img in tqdm(islice(train_ds, num_per_batch)):
    label_id = np.where(img[1] == 1)[1][0]
    labels.append(label_id)

    in_features = inception_v3_features(img[0])
    in_features = pd.DataFrame(in_features)

    train_df = pd.concat([train_df, in_features], ignore_index=True)

100it [05:30,  3.30s/it]


KeyboardInterrupt: 

In [61]:
selected_rows = train_df.head(3068)
selected_rows.shape

(3068, 2048)

In [62]:
selected_elements = labels[:3068]

In [63]:
len(selected_elements)

3068

In [64]:
selected_rows['file_id'] = train_ds.filenames

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows['file_id'] = train_ds.filenames


In [65]:
selected_rows['label'] = selected_elements

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows['label'] = selected_elements


In [66]:
selected_rows.columns = ['incV3_' + str(col) for col in selected_rows.columns]

In [67]:
path = './inception-v3-data/inception_train.csv'

selected_rows.to_csv(path, index=False)