In [0]:
import os, glob

import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [0]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [0]:
project_dir = 'My Drive/projects/ING/Experiment_week/garbage_segmentation/'
data_dir = os.path.join(project_dir, 'data', 'raw')
if os.path.exists(project_dir + '/models_shared'):
  models_dir = project_dir + "/models_shared"
else:
  models_dir = project_dir + "/models"
os.listdir(project_dir + "/data/raw/train/paper")[:5]

In [0]:
classes = os.listdir(project_dir + "/data/raw/train")

def to_categorical(labels, label_to_index):
    labels_int = pd.Series(labels).map(label_to_index)
    return tf.keras.utils.to_categorical(labels_int, num_classes=len(label_to_index))

label_to_index = dict(zip(classes, range(len(classes))))
index_to_label = {v: k for k, v in label_to_index.items()}
classes, label_to_index

In [0]:
class ImagePreprocessing(object):

  def __init__(self):
    self.target_size = (224, 224)
    self.preprocess_input = tf.keras.applications.resnet50.preprocess_input
    self.input_shape = (224, 224, 3)
    self.model_base = tf.keras.applications.resnet50.ResNet50(include_top=False, input_shape=self.input_shape)

  def load_image(self, filename):
    image = tf.keras.preprocessing.image.load_img(filename, target_size=self.target_size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    return image

  def process(self, filename_or_list, verbose=1):
    lst = [filename_or_list] if not isinstance(filename_or_list, list) else filename_or_list
    df = pd.DataFrame({'path': lst})
    df['name'] = df['path'].str.split('/').str[-1]
    df['label'] = df['path'].str.split('/').str[-2]
    df['set'] = df['path'].str.split('/').str[-3]
    df['img'] = [self.load_image(filename) for filename in tqdm(lst)]
    img_preped = np.stack([self.preprocess_input(x) for x in tqdm(df['img'].values)])
    img_embed = self.model_base.predict(img_preped, batch_size=20, verbose=verbose)
    df['img_embed'] = [x for x in img_embed]
    return df

img_pre = ImagePreprocessing()

In [0]:
test_names = sorted(glob.glob(data_dir + '/test/*/*'))
print(len(test_names))
print("\n".join(test_names[:2]))

In [0]:
np.random.seed(1)
test_sample = list(np.random.choice(test_names, 10))
print("\n".join(test_sample))

In [0]:
model = tf.keras.models.load_model(models_dir + "/model0.h5")

In [0]:
df = img_pre.process(test_sample)
pred = model.predict(np.stack(df['img_embed']))
print(pred.shape)
print(np.mean(pred, axis=0))
print(pred[0, :])
df['pred'] = np.argmax(pred, axis=1)
df['pred_pr'] = np.max(pred, axis=1)
df['pred_label'] = df['pred'].map(index_to_label)
df[['label', 'pred_label']]

In [0]:
test_df = img_pre.process(test_names)
pred = model.predict(np.stack(test_df['img_embed']))
test_df['pred'] = np.argmax(pred, axis=1)
test_df['pred_pr'] = np.max(pred, axis=1)
test_df['pred_label'] = test_df['pred'].map(index_to_label)
display(test_df[['label', 'pred_label', 'pred_pr']].head(10))

In [0]:
display(test_df.groupby(['label', 'pred_label']).size().unstack(level=1).fillna(0).astype(int))

In [0]:
from ipywidgets import interactive

def show_samples(label='glass', pred_label='metal', count=3):
  sel = test_df[(test_df['label']==label) & (test_df['pred_label']==pred_label)].head(count)
  display(sel[['name', 'label', 'pred_label', 'pred_pr']])
  if len(sel) == 0:
    return
  fix, axs = plt.subplots(nrows=len(sel), figsize=(10, 5*len(sel)))
  if len(sel) == 1:
    axs = [axs]
  for i in range(len(sel)):
    axs[i].imshow(tf.keras.preprocessing.image.array_to_img(sel.iloc[i].img), origin='lower')
  plt.show()

interactive(show_samples, label=classes, pred_label=classes, count=[3, 5, 10])