<a href="https://colab.research.google.com/github/lee4205/Potato_Chip_Classification/blob/master/%E3%81%93%E3%82%8C%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6%E8%A7%A3%E6%9E%90%E3%81%97%E3%82%88%E3%81%86.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://username:password@github.com/lee4205/Potato_Chip_Classification.git
!git config --global user.email "email@gmail.com"
!git config --global user.name "username"

Cloning into 'Potato_Chip_Classification'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 2045 (delta 2), reused 0 (delta 0), pack-reused 2039[K
Receiving objects: 100% (2045/2045), 154.67 MiB | 34.58 MiB/s, done.
Resolving deltas: 100% (210/210), done.


In [2]:
cd Potato_Chip_Classification

/content/Potato_Chip_Classification


In [3]:
import os
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [4]:
image_width = 1280 // 10
image_height = 960 // 10
image_size = (image_width, image_height)
image_channel = 1

In [5]:
cwd = os.getcwd()
flavors = os.listdir(cwd + "/potato-chips")

In [6]:
pixels = []
pixel = 0
for x in range(image_width * image_height):
    pixel += 1
    pixels.append('pixel' + str(pixel))

In [7]:
with open("potato_chips.csv", 'a') as c:
    csv_input = csv.writer(c)
    header = ["image", "flavor"]
    header.extend(pixels)
    csv_input.writerow(header)

    for flavor in flavors:
        print("loading image from " + flavor + " ...")
        images = os.listdir(cwd + f"/potato-chips/{flavor}")
        for image in images:
            rgb_image = Image.open(cwd + f"/potato-chips/{flavor}/" + image)
            grey_image = rgb_image.convert('L').resize((image_width, image_height))
            pixel_data = np.asarray(grey_image.getdata(), dtype=np.int).reshape((grey_image.size[1], grey_image.size[0]))
            pixel_data = pixel_data.flatten()
            image_data = [image, flavors.index(flavor)]
            image_data.extend(pixel_data)
            csv_input.writerow(image_data)

loading image from usushio ...
loading image from consomme-punch ...
loading image from norishio ...
loading image from shiawase-butter ...
loading image from norishio-punch ...
loading image from kyusyu-shoyu ...
loading image from shoyu-mayo ...


In [8]:
df = pd.read_csv(cwd + "/potato_chips.csv")

In [9]:
train_df, validate_df = train_test_split(df, train_size=0.6, random_state=42)
validate_df, test_df = train_test_split(validate_df, test_size=0.5, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [10]:
# デバッグ用
# train_df
# validate_df
# test_df
# plt.figure(figsize=(15, 15))
# sns.set_style("darkgrid")
# sns.countplot(train_df['flavor'])
# sns.countplot(validate_df['flavor'])
# sns.countplot(test_df['flavor'])

In [11]:
y_train = train_df['flavor']
y_validate = validate_df['flavor']
y_test = test_df['flavor']
y = test_df['flavor']

In [12]:
del train_df['image'], train_df['flavor']
del validate_df['image'], validate_df['flavor']
del test_df['image'], test_df['flavor']

In [13]:
label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_validate = label_binarizer.fit_transform(y_validate)
y_test = label_binarizer.fit_transform(y_test)

In [14]:
x_train = train_df.values
x_validate = validate_df.values
x_test = test_df.values

In [15]:
x_train = x_train / 255
x_validate = x_validate / 255
x_test = x_test / 255

In [16]:
x_train = x_train.reshape(-1, image_height, image_width, 1)
x_validate = x_validate.reshape(-1, image_height, image_width, 1)
x_test = x_test.reshape(-1, image_height, image_width, 1)

In [17]:
# デバッグ用
# f, ax = plt.subplots(3, 3)
# f.set_size_inches(10, 10)
# k = 0
# for i in range(3):
#     for j in range(3):
#         ax[i, j].imshow(x_train[k].reshape(image_height, image_width), cmap="gray")
#         k += 1
#     plt.tight_layout() 

In [18]:
datagen = ImageDataGenerator(featurewise_center=False,
                             samplewise_center=False,
                             featurewise_std_normalization=False,
                             samplewise_std_normalization=False,
                             zca_whitening=False,
                             rotation_range=10,
                             zoom_range=0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             horizontal_flip=False,
                             vertical_flip=False)
datagen.fit(x_train)

In [19]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                            patience=2,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

In [20]:
model = Sequential()
model.add(Conv2D(75, (3, 3), strides=1, padding='same', activation='relu', input_shape=(image_width, image_height, image_channel)))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(50, (3, 3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Conv2D(25, (3, 3), strides=1, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2, 2), strides=2, padding='same'))
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=7, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 96, 75)       750       
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 96, 75)       300       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 48, 75)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 48, 50)        33800     
_________________________________________________________________
dropout (Dropout)            (None, 64, 48, 50)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 48, 50)        200       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 24, 50)        0

In [None]:
history = model.fit(datagen.flow(x_train, y_train, batch_size=5),
                    epochs=1200,
                    validation_data=(x_validate, y_validate),
                    callbacks=[learning_rate_reduction])

Epoch 1/1200
Epoch 2/1200
Epoch 3/1200
Epoch 4/1200
Epoch 5/1200
Epoch 6/1200
Epoch 7/1200
Epoch 8/1200
Epoch 9/1200
Epoch 10/1200
Epoch 11/1200
Epoch 12/1200

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/1200
Epoch 14/1200
Epoch 15/1200
Epoch 16/1200
Epoch 17/1200

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 18/1200
Epoch 19/1200
Epoch 20/1200
Epoch 21/1200

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 22/1200
Epoch 23/1200

Epoch 00023: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 24/1200
Epoch 25/1200

Epoch 00025: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/1200
Epoch 27/1200

Epoch 00027: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 28/1200
Epoch 29/1200

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 30/1200
Epoch 31/1200
Epoch 32/1200
Epoch 33/1

In [None]:
print("Accuracy of the model : ", model.evaluate(x_validate, y_validate)[1] * 100, "%")

In [None]:
model.save_weights("model.h5")

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
axs[0].plot(range(1, len(history.history["accuracy"]) + 1), history.history["accuracy"])
axs[0].plot(range(1, len(history.history["val_accuracy"]) + 1), history.history["val_accuracy"])
axs[0].set_title("Model Accuracy")
axs[0].set_ylabel("Accuracy")
axs[0].set_xlabel("Epoch")
axs[0].set_xticks(np.arange(1, len(history.history["accuracy"]) + 1), len(history.history["accuracy"]) / 10)
axs[0].legend(["train", "val"], loc="best")
axs[1].plot(range(1, len(history.history["loss"]) + 1), history.history["loss"])
axs[1].plot(range(1, len(history.history["val_loss"]) + 1), history.history["val_loss"])
axs[1].set_title("Model Loss")
axs[1].set_ylabel("Loss")
axs[1].set_xlabel("Epoch")
axs[1].set_xticks(np.arange(1, len(history.history["loss"]) + 1), len(history.history["loss"]) / 10)
axs[1].legend(["train", "val"], loc="best")
plt.show()

In [None]:
predictions = model.predict_classes(x_test)
print(predictions)

In [None]:
classes = ["Class " + str(i) for i in range(7)]
print(classification_report(y, predictions, target_names = classes))

In [None]:
cm = confusion_matrix(y, predictions)
cm = pd.DataFrame(cm, index=[i for i in range(7)], columns=[i for i in range(7)])
plt.figure(figsize=(10, 10))
sns.heatmap(cm, cmap="Blues", linecolor='black', linewidth=1, annot=True, fmt='')

In [None]:
correct = (predictions == y).to_numpy().nonzero()[0]

In [None]:
i = 0
plt.figure(figsize=(10, 10))
for c in correct[:9]:
    plt.subplot(3, 3, i + 1)
    plt.imshow(x_test[c].reshape(image_height, image_width), interpolation='none')
    plt.title("Predicted Class {}\n Actual Class {}".format(flavors[predictions[c]], flavors[y[c]]))
    i += 1