

---


### Import TensorFlow and helper libraries


---



In [0]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2



---


### Check TensorFlow (tested with 2.2.0-rc2)


---



In [0]:
print(tf.__version__)



---


### Check Python (tested with 3.6.9)


---



In [0]:
from platform import python_version
print(python_version())



---


### Upload dataset to Colab and unzip

Upload = clone whole repository which contains zipped dataset.

After unzipping, your dataset should be available from **/content** folder:
* ***train*** folder - */content/dataset_trash/train*
* ***test*** folder - */content/dataset_trash/test*


---



In [0]:
!git clone https://github.com/marcin-ch/Trash_Image_Classification.git

In [0]:
!unzip /content/Trash_Image_Classification/dataset_trash.zip #when your zipped dataset is in CONTENT folder

In [0]:
print(os.getcwd())



---


### Create arrays with images and labels


---



In [0]:
#TRAIN IMAGES
train_images = []
train_labels = []
shape = (32,32)

for i in os.listdir('/content/dataset_trash/train'):
        img = cv2.imread(os.path.join('/content/dataset_trash/train',i))
        if i.split('.')[1] =='jpg':
            #print(i)
            img2 = cv2.resize(img,shape)
            img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB) #changing from BGR to RGB (OpenCV uses BGR, matplotlib for displaying uses RGB)
            train_images.append(img2)
            train_labels.append(i.split('_')[0])

In [0]:
#TEST IMAGES
test_images = []
test_labels = []

for i in os.listdir('/content/dataset_trash/test'):
        img = cv2.imread(os.path.join('/content/dataset_trash/test',i))
        if i.split('.')[1] == 'jpg':
            #print(i)
            img2 = cv2.resize(img,shape)
            img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
            test_images.append(img2)
            test_labels.append(i.split('_')[0])



---


### Verify the data **TRAINING** dataset

To verify that the dataset looks correct, let's plot the first 10 images from the **training** set and display the class name below each image.


---




In [0]:
plt.imshow(train_images[0])
plt.title(train_labels[0])

plt.figure(figsize=(20,20))
for i in range(10):
    plt.subplot(1,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    #visualize train images
    plt.imshow(train_images[i], cmap=plt.cm.binary)

    plt.xlabel(train_labels[i])
    plt.ylabel(i)
plt.show()



---


### Verify the data **TESTING** dataset

To verify that the dataset looks correct, let's plot the first 10 images from the **testing** set and display the class name below each image.


---




In [0]:
plt.imshow(test_images[0])
plt.title(test_labels[0])

plt.figure(figsize=(20,20))
for i in range(10):
    plt.subplot(1,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    #visualize test images
    plt.imshow(test_images[i], cmap=plt.cm.binary)

    plt.xlabel(test_labels[i])
    plt.ylabel(i)
plt.show()



---


### Convert **images** to Numpy and normalize values to be between 0 and 1


---



In [0]:
train_images = np.array(train_images)
print("TRAIN shape", train_images.shape)

test_images = np.array(test_images)
print("TEST shape", test_images.shape)

train_images, test_images = train_images / 255.0, test_images / 255.0 #Normalize pixel values to be between 0 and 1



---


### Convert **labels** to Numpy and change names of trash (strings) to coresponding integers

* `cardboard` -> `0`
* `glass` -> `1`
* `metal` -> `2`
* `paper` -> `3`
* `plastic` -> `4`
* `trash` -> `5`


---



In [0]:
print("* * * * * * TRAIN LABELS * * * * *")
#print(train_labels)
train_labels = np.array(train_labels)
train_labels[train_labels == "cardboard"] = 0
train_labels[train_labels == "glass"] = 1
train_labels[train_labels == "metal"] = 2
train_labels[train_labels == "paper"] = 3
train_labels[train_labels == "plastic"] = 4
train_labels[train_labels == "trash"] = 5 #after this modification arrays is string type and can not be used in model training
print("Data type of the array train_labels is:", train_labels.dtype)

train_labels = train_labels.astype(int) #changing type to int
print("New type of the array train_labels is: ", train_labels.dtype)

print("Shape of train_labels is: ", train_labels.shape)
#print(train_labels)

print("\n* * * * * * TEST LABELS * * * * *")
#print(test_labels)
test_labels = np.array(test_labels)
test_labels[test_labels == "cardboard"] = 0
test_labels[test_labels == "glass"] = 1
test_labels[test_labels == "metal"] = 2
test_labels[test_labels == "paper"] = 3
test_labels[test_labels == "plastic"] = 4
test_labels[test_labels == "trash"] = 5
print("Data type of the array test_labels is:", test_labels.dtype)

test_labels = test_labels.astype(int)
print("New type of the array test_labels is: ", test_labels.dtype)

print("Shape of test_labels is: ", test_labels.shape)
#print(test_labels)



---


### Define the class names, input shape and number of outputs


---



In [0]:
class_names = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'] #corresponds with cardboard->0, glass->1, metal->2, paper->3, plastic->4, trash->5

input_shape = train_images[0].shape
print("INPUT_SHAPE", input_shape)

output = len(class_names)
print("OUTPUT CLASSES", output)



---


### Create the convolutional base


---



In [0]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Dropout(0.2))

model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(output))

model.summary()



---


### Compile and train the model
https://www.tensorflow.org/js/guide/train_models#optimizer_loss_and_metric


---



In [0]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=50, 
                    validation_data=(test_images, test_labels))



---


### Evaluate the model


---



In [0]:
#model.metrics_names

In [0]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.0, 1])
plt.legend(loc='lower right')
#plt.xticks(range(2)) #refers to number of epochs

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)



---


### Add Softmax layer
https://www.tensorflow.org/tutorials/keras/classification#make_predictions

With the model trained, you can use it to make predictions about some images. The model's linear outputs, [logits](https://developers.google.com/machine-learning/glossary#logits). Attach a softmax layer to convert the logits to probabilities, which are easier to interpret.


---



In [0]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

probability_model.summary()



---


### Make predictions


---



In [0]:
predictions = probability_model.predict(test_images)



---


### Verify predictions
https://www.tensorflow.org/tutorials/keras/classification#verify_predictions

Correct prediction labels are blue and incorrect prediction labels are red. The number gives the percentage (out of 100) for the predicted label.

---



In [0]:
#Helper functions for drawing graphs

def plot_image(i, predictions_array, true_label, img):
  predictions_array, true_label, img = predictions_array, true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array, true_label[i]
  plt.grid(True) #MCH
  #plt.grid(False)
  plt.xticks(range(6), class_names, rotation=75, fontsize=12) #MCH
  #plt.xticks(range(10))
  #plt.yticks([]) #MCH Passing an empty list removes all yticks
  thisplot = plt.bar(range(6), predictions_array*100, color="#777777")
  plt.ylim([0, 100]) #MCH
  #plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')

In [0]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 15
num_cols = 4
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images): #for next batch use range(60,120); range(120,180) and so on
  plt.subplot(num_rows, 2*num_cols, 2*i+1) #for next batch use 2*(i-60)+1; 2*(i-120)+1 and so on
  plot_image(i, predictions[i], test_labels, test_images)
  plt.ylabel(i)
  plt.subplot(num_rows, 2*num_cols, 2*i+2) #for next batch use 2*(i-60)+2; 2*(i-120)+2 and so on
  plot_value_array(i, predictions[i], test_labels)
plt.tight_layout()
plt.show()



---


### Save the model HDF5 format
https://www.tensorflow.org/tutorials/keras/save_and_load#hdf5_format

Keras saves models by inspecting the architecture. This technique saves everything:
* The weight values
* The model's architecture
* The model's training configuration(what you passed to compile)
* The optimizer and its state, if any (this enables you to restart training where you left)

Keras models are usually saved via `model.save(filepath)`, which produces a single HDF5 (.h5) file containing both the model topology and the weights.

https://www.tensorflow.org/js/tutorials/conversion/import_keras#step_1_convert_an_existing_keras_model_to_tfjs_layers_format


---



In [0]:
# Save the entire model to a HDF5 file.
# The '.h5' extension indicates that the model should be saved to HDF5.

probability_model.save('trash_image_classification.h5')