In [1]:
import tensorflow as tf
print(tf.__version__)

2.2.0


In [2]:
import tensorflow.keras as tfk
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Flatten, BatchNormalization, MaxPool2D, GlobalAveragePooling2D, Dense, Dropout, Activation
import pandas as pd
import numpy as np

In [3]:
data_dir = "../input/vietai-c6-assignment3-extracted-dataset/train.csv"
sub_dir = "../input/vietai-c6-assignment3-extracted-dataset/sample_submission.csv"
train_df = pd.read_csv(data_dir)
sub_df = pd.read_csv(sub_dir)

In [4]:
train_df.head()

Unnamed: 0,image,label
0,0.jpg,0
1,1.jpg,3
2,10.jpg,2
3,100.jpg,0
4,1000.jpg,0


In [5]:
sub_df.head()

Unnamed: 0,image,label
0,10010.jpg,0
1,10011.jpg,0
2,10028.jpg,0
3,10034.jpg,0
4,10056.jpg,0


In [6]:
classes = ["book", "can", "cardboard", "glass_bottle", "pen", "plastic_bottle"]
train_y = train_y = train_df.label
num_classes = len(np.unique(train_y))
y_ohe = tf.keras.utils.to_categorical(train_y, num_classes=num_classes)

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

size = 224
batch_size=32
train_data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2, horizontal_flip=True, vertical_flip=True)
train_gen = train_data_gen.flow_from_directory("../input/vietai-c6-assignment3-extracted-dataset/train", batch_size=batch_size,
                                              target_size=(size, size), subset="training")

valid_gen = train_data_gen.flow_from_directory("../input/vietai-c6-assignment3-extracted-dataset/train", batch_size=batch_size,
                                          target_size=(size, size), subset="validation")

Found 25251 images belonging to 6 classes.
Found 6308 images belonging to 6 classes.


In [None]:
def create_model():
    model = tfk.Sequential()
    model.add(Conv2D(144, (3, 3), strides=(1, 1), padding="valid", input_shape=(224, 224, 3)))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPool2D())
    model.add(Conv2D(94, (3, 3), strides=(1, 1), padding="valid"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPool2D())
    model.add(Conv2D(144, (3, 3), strides=(1, 1), padding="same"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPool2D())
    model.add(Conv2D(94, (3, 3), strides=(1, 1), padding="valid"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPool2D())
    model.add(Conv2D(42, (3, 3), strides=(1, 1), padding="valid"))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(MaxPool2D())
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dropout(0.21))
    model.add(Dense(256, activation="tanh", kernel_regularizer=tf.keras.regularizers.l2()))
    model.add(Dropout(0.21))
    model.add(Dense(256, activation="tanh", kernel_regularizer=tf.keras.regularizers.l2()))
    model.add(Dropout(0.21))
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    print(model.summary())
    return model

In [8]:
def create_pretrained_model():
    model = tfk.Sequential()
    pretrained_net = tfk.applications.InceptionResNetV2(
        include_top=False,
        input_shape=(224, 224, 3),
        pooling="avg"
    )
    model.add(pretrained_net)
    model.add(Dropout(0.42))
    model.add(Dense(256, activation="tanh", kernel_regularizer=tf.keras.regularizers.l2()))
    model.add(Dropout(0.42))
    model.add(Dense(128, activation="tanh", kernel_regularizer=tf.keras.regularizers.l2()))
    model.add(Dropout(0.42))
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    print(model.summary())
    return model

In [9]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get("val_accuracy") is not None:
            if(logs.get('val_accuracy') > 0.99):
                print("\nReached 99% val_accuracy so cancelling training!")
                self.model.stop_training = True
mcb = myCallback()

In [10]:
epochs = 150
use_efficientnet = True
if not use_efficientnet:
    model = create_model()
else:
    model = create_pretrained_model()

start_lr = 0.00001
min_lr = 0.00001
max_lr = 0.00005*8
rampup_epochs = 5
sustain_epochs = 0
exp_decay = .8

def lrfn(epoch):
    if epoch < rampup_epochs:
        return (max_lr - start_lr)/rampup_epochs * epoch + start_lr
    elif epoch < rampup_epochs + sustain_epochs:
        return max_lr
    else:
        return (max_lr - min_lr) * exp_decay**(epoch-rampup_epochs-sustain_epochs) + min_lr

mcp = tf.keras.callbacks.ModelCheckpoint("my_model.h5", monitor="val_accuracy",
                        save_best_only=True, save_weights_only=True, period=3)
val_acc_earlyStop = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", 
                                                         patience = epochs//15, restore_best_weights = True)
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda epoch: lrfn(epoch), verbose=True)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_resnet_v2 (Model)  (None, 1536)              54336736  
_________________________________________________________________
dropout (Dropout)            (None, 1536)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               393472    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout_2 (Dropout)  

In [16]:
model.load_weights("../input/pretrained-inceptionres/my_model (1).h5")

In [29]:
model.evaluate(valid_gen)



[0.16177572309970856, 0.9662333726882935]

In [35]:
train_data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2, horizontal_flip=True, vertical_flip=True)
train_gen = train_data_gen.flow_from_directory("../input/vietai-c6-assignment3-extracted-dataset/train", batch_size=batch_size,
                                              target_size=(size, size), subset="training", seed=42)

valid_gen = train_data_gen.flow_from_directory("../input/vietai-c6-assignment3-extracted-dataset/train", batch_size=batch_size,
                                          target_size=(size, size), subset="validation", seed=42)

Found 25251 images belonging to 6 classes.
Found 6308 images belonging to 6 classes.


In [36]:
model.fit(train_gen, validation_data=valid_gen, epochs=epochs,initial_epoch=45, callbacks=[mcp, mcb, val_acc_earlyStop, lr_callback])


Epoch 00046: LearningRateScheduler reducing learning rate to 1.0051839891835612e-05.
Epoch 46/150

Epoch 00047: LearningRateScheduler reducing learning rate to 1.004147191346849e-05.
Epoch 47/150

Epoch 00048: LearningRateScheduler reducing learning rate to 1.0033177530774792e-05.
Epoch 48/150

Epoch 00049: LearningRateScheduler reducing learning rate to 1.0026542024619834e-05.
Epoch 49/150

Epoch 00050: LearningRateScheduler reducing learning rate to 1.0021233619695867e-05.
Epoch 50/150

Epoch 00051: LearningRateScheduler reducing learning rate to 1.0016986895756694e-05.
Epoch 51/150

Epoch 00052: LearningRateScheduler reducing learning rate to 1.0013589516605356e-05.
Epoch 52/150

Epoch 00053: LearningRateScheduler reducing learning rate to 1.0010871613284285e-05.
Epoch 53/150

Epoch 00054: LearningRateScheduler reducing learning rate to 1.0008697290627427e-05.
Epoch 54/150

Epoch 00055: LearningRateScheduler reducing learning rate to 1.0006957832501943e-05.
Epoch 55/150

Epoch 0005

<tensorflow.python.keras.callbacks.History at 0x7f6822f0c090>

In [43]:
model.save_weights("weight_model.h5")
model.save("full_model.h5")

In [41]:
model.evaluate(valid_gen)



[0.17513923346996307, 0.9682942032814026]

In [42]:
test_data_gen = ImageDataGenerator(rescale=1.0/255)
test_generator = test_data_gen.flow_from_directory("../input/vietai-c6-assignment3-extracted-dataset/test",class_mode=None, target_size=(size, size), shuffle=False)
test_generator.reset()
pred = model.predict(test_generator)

# pred là một ma trận xác suất của ảnh trên các lớp.
# Ta lấy lớp có xác suất cao nhất trên từng ảnh bằng hàm argmax
pred_labels = np.argmax(pred, axis=1)
sub_df['label'] = pred_labels
sub_df.head(20)

Found 3837 images belonging to 1 classes.


Unnamed: 0,image,label
0,10010.jpg,2
1,10011.jpg,1
2,10028.jpg,2
3,10034.jpg,3
4,10056.jpg,5
5,10081.jpg,2
6,10084.jpg,1
7,10091.jpg,3
8,101.jpg,2
9,1010.jpg,5


In [34]:
sub_df.to_csv("submission.csv", index=False)