In [2]:
import os
import cv2
import numpy as np

import keras
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet import ResNet50
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Load data

In [3]:
# Load all kinds of labels
index2class = []
with open("2021VRDL_HW1_datasets/classes.txt", 'r') as file:
    while True:
        line = file.readline()
        if not line:  # EOF encountered
            break
        index2class.append(line.split('\n')[0])

# Assign index to each of the labels
class2index = dict()
for i, c in enumerate(index2class):
    class2index[c] = i

In [4]:
# Read the labels of training data and keep their order
x_train_order = []
y_train_raw = []
with open("2021VRDL_HW1_datasets/training_labels.txt", 'r') as file:
    while True:
        line = file.readline()
        if not line:  # EOF encountered
            break
        img_name, label = line.split('\n')[0].split(' ')
        x_train_order.append(img_name)
        label_idx = class2index[label]
        y_train_raw.append([label_idx])
y_train_raw = np.array(y_train_raw)

In [5]:
# Keep the order of reading testing data
x_test_order = []
with open("2021VRDL_HW1_datasets/testing_img_order.txt", 'r') as file:
    while True:
        line = file.readline()
        if not line:  # EOF encountered
            break
        x_test_order.append(line.split('\n')[0])

In [6]:
# Load images of training and testing data in the specified order
def load_images(img_dir, load_order):
    dataset = []
    for file in load_order:
        img_path = img_dir + '/' + file
        img = cv2.imread(img_path)
        img_resize = cv2.resize(img, (224, 224))
        dataset.append(img_resize)
    return np.array(dataset)

x_train_raw = load_images("2021VRDL_HW1_datasets/training_images", x_train_order)
x_test_raw = load_images("2021VRDL_HW1_datasets/testing_images", x_test_order)

In [7]:
print(x_train_raw.shape)
print(x_test_raw.shape)
print(y_train_raw.shape)

(3000, 224, 224, 3)
(3033, 224, 224, 3)
(3000, 1)


## Data preprocessing

In [8]:
x_train = x_train_raw.copy()
x_test = x_test_raw.copy()

# Convert class vectors to one-hot encoding (keras model requires one-hot label as inputs)
num_classes = len(index2class)
y_train = np_utils.to_categorical(y_train_raw, num_classes)

In [9]:
# Split out some training data for validation
num_data = x_train.shape[0]
train_idx, valid_idx = train_test_split(range(num_data), random_state=777, train_size=0.7)

x_train_train = x_train[train_idx]
x_train_valid = x_train[valid_idx]
y_train_train = y_train[train_idx]
y_train_valid = y_train[valid_idx]

In [10]:
print(x_train_train.shape)
print(x_train_valid.shape)
print(y_train_train.shape)
print(y_train_valid.shape)

(2100, 224, 224, 3)
(900, 224, 224, 3)
(2100, 200)
(900, 200)


## Build model and validate its performance

In [11]:
# Build model
base_model = ResNet50(weights="imagenet", input_shape=(224, 224, 3), include_top=False, pooling='max')

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.35))
model.add(BatchNormalization())
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.35))
model.add(BatchNormalization())
model.add(Dense(200, activation='softmax'))

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=25,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    zoom_range=0.15,
    shear_range=0.15,
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False)  # randomly flip images

# Compile and fit
print("Phase 1")
base_model.trainable = False
opr = Adam(learning_rate=1e-4)
model.compile(loss='categorical_crossentropy', optimizer=opr, metrics=['accuracy'])

model.fit(datagen.flow(x_train_train, y_train_train, batch_size=64),
          epochs=30,
          validation_data=(x_train_valid, y_train_valid),
          shuffle=True)

print("Phase 2")
base_model.trainable = True
opr = Adam(learning_rate=1e-5)
model.compile(loss='categorical_crossentropy', optimizer=opr, metrics=['accuracy'])

model.fit(datagen.flow(x_train_train, y_train_train, batch_size=64),
          epochs=30,
          validation_data=(x_train_valid, y_train_valid),
          shuffle=True)

2021-11-04 12:10:56.799403: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-04 12:10:56.909568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-04 12:10:56.910303: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-04 12:10:56.912173: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Phase 1


2021-11-04 12:11:03.717438: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/30


2021-11-04 12:11:08.311115: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Phase 2
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f51b0496190>

In [12]:
y_valid_pred = model.predict(x_train_valid)
print(y_valid_pred.shape)
y_valid_pred = np.argmax(y_valid_pred, axis=1)
print(y_valid_pred.shape)

(900, 200)
(900,)


## Train the model and do prediction

In [13]:
print(x_train.shape)
print(y_train.shape)

(3000, 224, 224, 3)
(3000, 200)


In [14]:
# Build model
base_model = ResNet50(weights="imagenet", input_shape=(224, 224, 3), include_top=False, pooling='max')

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.35))
model.add(BatchNormalization())
model.add(Dense(2048, activation='relu'))
model.add(Dropout(0.35))
model.add(BatchNormalization())
model.add(Dense(200, activation='softmax'))

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=25,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    zoom_range=0.15,
    shear_range=0.15,
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False)  # randomly flip images

# Compile and fit
print("Phase 1")
base_model.trainable = False
opr = Adam(learning_rate=1e-4)
model.compile(loss='categorical_crossentropy', optimizer=opr, metrics=['accuracy'])

model.fit(datagen.flow(x_train, y_train, batch_size=64),
          epochs=30,
          shuffle=True)

print("Phase 2")
base_model.trainable = True
opr = Adam(learning_rate=1e-5)
model.compile(loss='categorical_crossentropy', optimizer=opr, metrics=['accuracy'])

model.fit(datagen.flow(x_train, y_train, batch_size=64),
          epochs=30,
          shuffle=True)

Phase 1
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Phase 2
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f518c252b50>

In [15]:
model.save("model.h5")



In [16]:
y_pred = model.predict(x_test)
print(y_pred.shape)
y_pred = np.argmax(y_pred, axis=1)
print(y_pred.shape)

(3033, 200)
(3033,)


## Pack submission file

In [17]:
submission = []
for i in range(len(x_test_order)):
    img = x_test_order[i]
    pred_class = index2class[y_pred[i]]
    submission.append([img, pred_class])

np.savetxt('answer.txt', submission, fmt='%s')