In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as image
import os
import shutil
import random
import re
import tensorflow as tf
import numpy as np
import json
import time
import sklearn.model_selection
import sklearn.metrics

In [None]:
RunningInCOLAB = 'google.colab' in str(get_ipython())

# check if in colab
if RunningInCOLAB and not os.path.isdir('/content/gdrive'):
    print("Running in colab")
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)
    colab_root = '/content/drive'
      
if RunningInCOLAB:
    root_dir = "/content/gdrive/My Drive/"
    base_dir = root_dir + 'visual-proh/'
    if not os.path.isdir(base_dir):
        os.mkdir(base_dir)
else:
    root_dir= os.getcwd()
    base_dir = root_dir

os.chdir(base_dir)

os.getcwd()

In [None]:
im_path = 'filtered_train'

In [None]:
if not os.path.isdir(im_path):
  os.mkdir(im_path)

In [None]:
os.listdir('filtered_train')[0:20]

In [None]:
classes = ['shoe','trousers','jacket','sweatshirt']

In [None]:
def getModel(num_classes):
    basemodel = efb3 = tf.keras.applications.EfficientNetB3(
        weights="imagenet",
        include_top=False
    )
    basemodel.trainable = False
    model = tf.keras.models.Sequential()
    model.add(basemodel)
    model.add(tf.keras.layers.GlobalAveragePooling2D())
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

    return model

In [None]:
mymodel = getModel(len(classes))
mymodel.summary()

In [None]:
mymodel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
if os.path.isfile('filtered_df.csv'):
    filtered_df = pd.read_csv('filtered_df.csv', index_col=0)
filtered_df.head()

In [None]:
classes

In [None]:
for current_class in classes:
    filtered_df[current_class] = (filtered_df["class"] == current_class).astype('int32')
filtered_df.head()

In [None]:
train_df, test_df = sklearn.model_selection.train_test_split(filtered_df, test_size=0.2, random_state=1000)

In [None]:
train_df, validation_df = sklearn.model_selection.train_test_split(train_df, test_size=0.15, random_state=1000)

In [None]:
train_df.head()

In [None]:
def loadImages(pathlist, basedir='.'):
    size = len(pathlist)

    batcharr = np.zeros(shape=(size, 300, 300, 3))

    for i in range(0,size):

        img_path = os.path.join(basedir, pathlist[i])

        im = tf.keras.preprocessing.image.load_img(
            img_path,
            target_size=(300, 300, 3)
        )
        imarr = tf.keras.preprocessing.image.img_to_array(im)
        imarr = tf.keras.applications.efficientnet.preprocess_input(imarr)

        batcharr[i] = imarr

        print(f'\r{i}/{size}', end='')

    return batcharr

In [None]:
train_df["fname"].values[0]

In [None]:
imgs_loaded = loadImages(train_df["fname"].values, basedir=im_path)

In [None]:
imgs_loaded_validation = loadImages(validation_df["fname"].values, basedir=im_path)

In [None]:
validation_y = validation_df[["shoe", "trousers", "jacket", "sweatshirt"]].values

In [None]:
train_y = train_df[["shoe", "trousers", "jacket", "sweatshirt"]].values

In [None]:
hist = mymodel.fit(imgs_loaded, train_y, epochs=5, verbose=True, batch_size=32,
                    validation_data=(imgs_loaded_validation, validation_y))

In [None]:
mymodel.save('./mymodel.h5')

In [None]:
imgs_loaded_test = loadImages(test_df["fname"].values, basedir=im_path)

In [None]:
test_y = test_df[["shoe", "trousers", "jacket", "sweatshirt"]].values

In [None]:
test_y_pred = mymodel.predict(imgs_loaded_test)

In [None]:
classes

In [None]:
test_y_pred_df = pd.DataFrame(test_y_pred, columns=classes)

In [None]:
test_y_pred_df["class"] = test_y_pred_df[classes].apply(lambda x: classes[x.argmax()], axis=1)
test_y_pred_df.head()

In [None]:
test_cm = pd.DataFrame(sklearn.metrics.confusion_matrix(test_df["class"], test_y_pred_df["class"], labels=classes), columns=classes, index=classes, )
test_cm

In [None]:
test_acc = sklearn.metrics.accuracy_score(test_df["class"], test_y_pred_df["class"])
test_acc

In [None]:
test_df = test_df.reset_index()

In [None]:
test_df["pred_class"] = test_y_pred_df["class"]

In [None]:
errors = test_df[test_df["class"] != test_df["pred_class"]]
errors

In [None]:
im = image.imread(os.path.join(im_path, errors.loc[8]["fname"]))
plt.imshow(im)