<a href="https://colab.research.google.com/github/oktaviacitra/classification/blob/main/VillainClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive

origin_path = "/content/drive"
drive.mount(origin_path)

folder_path = origin_path + "/MyDrive/Learning Journey/Villain Data/"

Mounted at /content/drive


In [2]:
class_labels = ["Vader", "Green Goblin", "Joker", "Thanos", "Venom"]
image_files = [ (folder_path + label + "/" + label + " " + str(i) + ".jpg") for i in range(1, 21) for label in class_labels]
len(image_files)

100

In [3]:
import tensorflow
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing import image

def preprocessing_image(path, target):
    img = Image.open(path)

    # crop central
    width, height = img.size
    new_width = new_height = width
    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2
    img = img.crop((left, top, right, bottom))

    # convert to array of rgb
    img = img.resize(target)
    img = img.convert('RGB')
    x = image.img_to_array(img)
    x = x[:, :, :3]
    x = np.true_divide(x, 255)
    return x

In [4]:
from sklearn.preprocessing import LabelEncoder

X = np.asarray([preprocessing_image(image_file, (64,64)) for image_file in image_files])

y = [ label for i in range(1, 21) for label in class_labels]
le = LabelEncoder()
le.fit(y)
y = le.transform(y)


X.shape, y.shape

((100, 64, 64, 3), (100,))

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
X_train.shape, X_test.shape, X_val.shape

((70, 64, 64, 3), (15, 64, 64, 3), (15, 64, 64, 3))

In [6]:
!pip install -q -U keras-tuner

[?25l[K     |██▍                             | 10 kB 34.1 MB/s eta 0:00:01[K     |████▉                           | 20 kB 22.9 MB/s eta 0:00:01[K     |███████▎                        | 30 kB 17.4 MB/s eta 0:00:01[K     |█████████▋                      | 40 kB 15.8 MB/s eta 0:00:01[K     |████████████                    | 51 kB 7.3 MB/s eta 0:00:01[K     |██████████████▌                 | 61 kB 8.6 MB/s eta 0:00:01[K     |█████████████████               | 71 kB 9.1 MB/s eta 0:00:01[K     |███████████████████▎            | 81 kB 9.4 MB/s eta 0:00:01[K     |█████████████████████▊          | 92 kB 10.4 MB/s eta 0:00:01[K     |████████████████████████▏       | 102 kB 8.8 MB/s eta 0:00:01[K     |██████████████████████████▋     | 112 kB 8.8 MB/s eta 0:00:01[K     |█████████████████████████████   | 122 kB 8.8 MB/s eta 0:00:01[K     |███████████████████████████████▍| 133 kB 8.8 MB/s eta 0:00:01[K     |████████████████████████████████| 135 kB 8.8 MB/s 
[?25h

In [7]:
from tensorflow.keras import Input, Model, regularizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, BatchNormalization, Activation, Flatten, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam, SGD

def model_builder(hp):
  inputs = Input(shape=(64, 64, 3))
  x = Conv2D(filters = hp.Int('filters', min_value=64, max_value=1024, step = 32),
             kernel_size = hp.Int('kernel_size', 3, 5),
             padding="same",
             activation='relu',
             dilation_rate=2,
             kernel_initializer = "he_normal",
             kernel_regularizer=regularizers.l2(0.001),
             name="conv2d")(inputs)

  if hp.Choice('pooling', ['max', 'avg']) == 'max':
    x = MaxPooling2D(name="maxpool2d")(x)
  else:
    x = AveragePooling2D(name="averagepool2d")(x)

  x = BatchNormalization(name="batchnorm")(x)
  x = Dropout(0.1, name="dropout")(x)

  if hp.Choice('pooling', ['max', 'avg']) == 'max':
    x = GlobalMaxPooling2D(name='globalmaxpool')(x)
  else:
    x = GlobalAveragePooling2D(name='globalaveragepool')(x)

  x = Dense(units=hp.Int('units_1', min_value=128, max_value=512, step=32),
            activation='relu',
            name='fully_connected')(x)
  outputs = Dense(5, activation='softmax', name='prediction')(x)

  model = Model(inputs=inputs, outputs=outputs)

  learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
  optimizer = Adam(learning_rate=learning_rate) if hp.Choice('optimizer', ['adam', 'sgd']) == 'adam' else SGD(learning_rate=learning_rate)
  
  model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

  return model

In [8]:
import keras_tuner as kt

tuner = kt.Hyperband(hypermodel = model_builder,
                             objective = 'val_loss',
                             max_epochs = 10,
                             factor = 3,
                             directory = folder_path + '/results_dir',
                             project_name = 'villain')
tuner.search_space_summary()

Search space summary
Default search space size: 6
filters (Int)
{'default': None, 'conditions': [], 'min_value': 64, 'max_value': 1024, 'step': 32, 'sampling': None}
kernel_size (Int)
{'default': None, 'conditions': [], 'min_value': 3, 'max_value': 5, 'step': 1, 'sampling': None}
pooling (Choice)
{'default': 'max', 'conditions': [], 'values': ['max', 'avg'], 'ordered': False}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 128, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
optimizer (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'sgd'], 'ordered': False}


In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

best_weight = ModelCheckpoint(folder_path + "tuner/{epoch:02d}-{val_loss:.4f}.h5",
                              monitor="val_loss",
                              mode="min",
                              save_best_only=True,
                              verbose=1)
stop_early = EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train, y_train, epochs=20,
             validation_data=(X_val, y_val),
             callbacks=[stop_early, best_weight],
             verbose=2)

Trial 30 Complete [00h 00m 04s]
val_loss: 2.48443341255188

Best val_loss So Far: 1.9182846546173096
Total elapsed time: 00h 01m 13s
INFO:tensorflow:Oracle triggered exit


In [10]:
from tensorflow.keras.models import load_model

model = load_model(folder_path + "tuner/06-1.9183.h5")
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 64, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 64, 64, 160)       12160     
                                                                 
 averagepool2d (AveragePooli  (None, 32, 32, 160)      0         
 ng2D)                                                           
                                                                 
 batchnorm (BatchNormalizati  (None, 32, 32, 160)      640       
 on)                                                             
                                                                 
 dropout (Dropout)           (None, 32, 32, 160)       0         
                                                                 
 globalaveragepool (GlobalAv  (None, 160)              0     

In [11]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

callbacks = [EarlyStopping(monitor="val_loss", patience=3, verbose=1),
             ReduceLROnPlateau(monitor = 'val_loss', factor=0.1, patience=2, verbose=1),
             ModelCheckpoint("/kaggle/working/s{epoch:02d}-{val_loss:.4f}.h5", monitor="val_loss", mode="min", save_best_only=True, verbose=1)]
history = model.fit(X_train, y_train, epochs=200,
                    validation_data=(X_val, y_val), verbose=1,
                    callbacks=callbacks)

Epoch 1/200
Epoch 1: val_loss improved from inf to 1.91848, saving model to /kaggle/working/s01-1.9185.h5
Epoch 2/200
Epoch 2: val_loss did not improve from 1.91848
Epoch 3/200
Epoch 3: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.

Epoch 3: val_loss did not improve from 1.91848
Epoch 4/200
Epoch 4: val_loss did not improve from 1.91848
Epoch 4: early stopping
