# Garbage Classification - Part D

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow import keras

## 0. preprocessing

In [None]:
path = "./Garbage classification/"
listTruelabels = ["cardboard","glass","metal","paper","plastic","trash"]

# make tuple list [(filename.jpg, 0~6: index of listTruelabels)]
dataset = []
num_file = 100
for idx, truelabel in enumerate(listTruelabels):
    
    listFilenames = [file_ for path_, dir_, file_ in os.walk(path+truelabel)][0][:num_file]
    listTrue = [int(idx) for i in range(num_file)]
    listBinary = list(zip(listFilenames, listTrue))
    dataset += listBinary

In [None]:
# Shuffle data
np.random.seed(42)

npDataset = np.array(dataset) # shape : [600,2]
print(npDataset.shape)

np.random.shuffle(npDataset)
print(npDataset)

In [None]:
# split train and test dataset
npTrain = npDataset[:480] 
npTest = npDataset[480:]
print(npTrain.shape, npTest.shape)

## 1. Train dataset

In [None]:
# jpg to array for train set
for idx, i in enumerate(npTrain):
    im = np.asarray(Image.open(path + listTruelabels[int(npTrain[idx][1])] +"/"+ npTrain[idx][0]).convert("L")) # [384, 512, 3]
    reshape_im = np.reshape(im,[1,-1])
    if idx == 0:
        x_train = reshape_im
        y_train = [npTrain[idx,1]]   
        continue
    
    x_train = np.concatenate((x_train, reshape_im), axis = 0) # [B, 384 * 512 * 3]
    y_train = np.concatenate((y_train, [npTrain[idx, 1]]), axis = 0) # [B,]

print(x_train.shape, y_train.shape)

## 2. Test dataset

In [None]:
# jpg to array for test set
for idx, i in enumerate(npTest):
    im = np.asarray(Image.open(path + listTruelabels[int(npTest[idx][1])] +"/"+ npTest[idx][0]).convert("L"))
    reshape_im = np.reshape(im,[1,-1])
    if idx == 0:
        x_test = reshape_im
        y_test = [npTest[idx,1]]   
        continue
    
    x_test = np.concatenate((x_test, reshape_im), axis = 0) # [B, 384 * 512 * 3]
    y_test = np.concatenate((y_test, [npTest[idx, 1]]), axis = 0) # [B,]

print(x_test.shape, y_test.shape)

## visualization

In [None]:
some_digit = x_train[1]

some_digit_image = some_digit.reshape([384, 512])

plt.imshow(some_digit_image)
print(y_train[1], listTruelabels[int(y_train[1])])

## SGD classifier

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix
sgd = SGDClassifier(random_state=42)
sgd.fit(x_train, y_train)
cm = confusion_matrix(sgd.predict(x_test), y_test)
plt.matshow(cm, cmap=plt.cm.Blues)
plt.show()

In [None]:
# Simple ANN
model = keras.models.Sequential()
model.add(keras.layers.Dense(1000, activation="sigmoid"))
model.add(keras.layers.Dense(6, activation="sigmoid"))

model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

history=model.fit(x_train, y_train.astype(float), epochs=10, batch_size=30, validation_data=(x_test,y_test.astype(float)))

In [None]:
from sklearn.metrics import confusion_matrix
y_test_pred = model.predict_classes(x_test)
conf_mx = confusion_matrix(y_test.astype(float), y_test_pred)
plt.matshow(conf_mx, cmap=plt.cm.Blues)
#plt.savefig("d.png")
plt.show()