In [77]:
import numpy as np
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import ResNet50

SEED = 42
import random 
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)

# ** Inputs ** 

In [78]:
#Image Folder that needs to be "cleaned"
image_folder_name = 'FMD_tvt'

Setup Paths and Values

In [79]:
tvt_folders = ['train', 'val', 'test']
im_folders = ['image', 'mask']

Fmd_class_path = os.path.join(image_folder_name, tvt_folders[0], im_folders[0])
classes = os.listdir(os.path.join(Fmd_class_path))
print(classes)

['fabric', 'foliage', 'glass', 'leather', 'metal', 'paper', 'plastic', 'stone', 'water', 'wood']


In [80]:
image_path_train = os.path.join(image_folder_name, tvt_folders[0], im_folders[0])
mask_path_train = os.path.join(image_folder_name, tvt_folders[0], im_folders[1])

image_path_val = os.path.join(image_folder_name, tvt_folders[1], im_folders[0])
mask_path_val = os.path.join(image_folder_name, tvt_folders[1], im_folders[1])

image_path_test = os.path.join(image_folder_name, tvt_folders[2], im_folders[0])
mask_path_test = os.path.join(image_folder_name, tvt_folders[2], im_folders[1])

In [81]:
for tvt in tvt_folders:
    if os.path.exists(os.path.join(image_folder_name, tvt, im_folders[0], classes[1], 'Thumbs.db')):
        print(tvt, 'got it')
        os.remove(os.path.join(image_folder_name, tvt, im_folders[0], classes[1], 'Thumbs.db'))

train got it


# Get all names together

In [82]:
names_train = []
for class_ in classes:
    img_names = os.listdir(os.path.join(image_path_train, class_))
    names_train = names_train + img_names
    for name in img_names:
        img = cv2.imread(os.path.join(image_path_train, class_, name))

print(len(names_train))

599


In [83]:
names_val = []
for class_ in classes:
    img_names = os.listdir(os.path.join(image_path_val, class_))
    names_val = names_val + img_names
    for name in img_names:
        img = cv2.imread(os.path.join(image_path_val, class_, name))

print(len(names_val))

200


In [84]:
names_test = []
for class_ in classes:
    img_names = os.listdir(os.path.join(image_path_test, class_))
    names_test = names_test + img_names
    for name in img_names:
        img = cv2.imread(os.path.join(image_path_test, class_, name))

print(len(names_test))

201


# Load in Images in Tensorflow format

In [85]:
def fun_load_image(x):
    byte_img = tf.io.read_file(x) #Tensorflow way of loading in image
    img = tf.io.decode_jpeg(byte_img)  #Need both lines of code
    return img


Training Data

In [86]:
#Get all the names images
train_ds = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[0], im_folders[0], classes[0], "*.jpg"), shuffle=False)
for i in range(len(classes)-1):
    train_ds_class = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[0], im_folders[0], classes[i + 1], "*.jpg"), shuffle=False)
    train_ds = train_ds.concatenate(train_ds_class)

train_ds = train_ds.map(fun_load_image)
train_ds = train_ds.map(lambda x: tf.image.resize(x, (150,150)))
train_ds = train_ds.map(lambda x: x/255)

Validation Data

In [87]:
#Get all the names images
val_ds = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[1], im_folders[0], classes[0], "*.jpg"), shuffle=False)
for i in range(len(classes)-1):
    val_ds_class = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[1], im_folders[0], classes[i + 1], "*.jpg"), shuffle=False)
    val_ds = val_ds.concatenate(val_ds_class)

val_ds = val_ds.map(fun_load_image)
val_ds = val_ds.map(lambda x: tf.image.resize(x, (150,150)))
val_ds = val_ds.map(lambda x: x/255)

Test Data

In [88]:
test_ds = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[2], im_folders[0], classes[0], "*.jpg"), shuffle=False)
for i in range(len(classes)-1):
    test_ds_class = tf.data.Dataset.list_files(os.path.join(image_folder_name, tvt_folders[2], im_folders[0], classes[i + 1], "*.jpg"), shuffle=False)
    test_ds = test_ds.concatenate(test_ds_class)

test_ds = test_ds.map(fun_load_image)
test_ds = test_ds.map(lambda x: tf.image.resize(x, (150,150)))
test_ds = test_ds.map(lambda x: x/255)


# Check for Non RGB Values

Train Data

In [89]:
i = 0
removes_no_RGB_train = []
for elem in train_ds:
    if np.shape(elem.numpy()) != (150, 150, 3):
        print(np.shape(elem.numpy()), i)
        removes_no_RGB_train.append(i)
    i =  i + 1

print(removes_no_RGB_train)

(150, 150, 1) 190
(150, 150, 1) 482
[190, 482]


Validation Data

In [90]:
i = 0
removes_no_RGB_val = []
for elem in val_ds:
    if np.shape(elem.numpy()) != (150, 150, 3):
        print(np.shape(elem.numpy()), i)
        removes_no_RGB_val.append(i)
    i =  i + 1

print(removes_no_RGB_val)

[]


Test Data

In [91]:
i = 0
removes_no_RGB_test = []
for elem in test_ds:
    if np.shape(elem.numpy()) != (150, 150, 3):
        print(np.shape(elem.numpy()), i)
        removes_no_RGB_test.append(i)
    i =  i + 1

(150, 150, 1) 47


# Removal

Training Data

In [92]:
remove_names_train = []
for idx in removes_no_RGB_train:
    remove_name = names_train[idx]
    remove_names_train.append(remove_name)

print(remove_names_train)



['leather_moderate_022_new.jpg', 'water_moderate_007_new.jpg']


Validation Data

In [93]:
remove_names_val = []
for idx in removes_no_RGB_val:
    remove_name = names_val[idx]
    remove_names_val.append(remove_name)

print(remove_names_val)



[]


Test Data

In [94]:
remove_names_test = []
for idx in removes_no_RGB_test:
    remove_name = names_test[idx]
    remove_names_test.append(remove_name)

print(remove_names_test)

['glass_moderate_029_new.jpg']


Remove

In [95]:
for name in remove_names_train:
    class_label = name.split('_')[0]
    os.remove(os.path.join(image_path_train, class_label, name))
    os.remove(os.path.join(mask_path_train, class_label, name))

for name in remove_names_val:
    class_label = name.split('_')[0]
    os.remove(os.path.join(image_path_val, class_label, name))
    os.remove(os.path.join(mask_path_val, class_label, name))

for name in remove_names_test:
    class_label = name.split('_')[0]
    os.remove(os.path.join(image_path_test, class_label, name))
    os.remove(os.path.join(mask_path_test, class_label, name))

# Finished