# Preparing data

In [None]:
import numpy as np
from tensorflow.keras.utils import to_categorical

## Loading data

In [None]:
train_data = np.load('/content/drive/MyDrive/ProbSpace/宗教画テーマの分類/dataset/christ-train-imgs.npz')['arr_0']
train_label = np.load('/content/drive/MyDrive/ProbSpace/宗教画テーマの分類/dataset/christ-train-labels.npz')['arr_0']
test_data = np.load('/content/drive/MyDrive/ProbSpace/宗教画テーマの分類/dataset/christ-test-imgs.npz')['arr_0']

print('train_data shape: {0}'.format(train_data.shape))
print('test_data shape: {0}'.format(test_data.shape))

train_data shape: (654, 224, 224, 3)
test_data shape: (497, 224, 224, 3)


## one hot encoding label data

In [None]:
train_label = to_categorical(train_label)

print('train_label shape: {0}'.format(train_label.shape))

train_label shape: (654, 13)


# Preprocessing

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(featurewise_center=False, 
                             samplewise_center=False,
                             featurewise_std_normalization=False, 
                             samplewise_std_normalization=False,
                             zca_whitening=False, 
                             zca_epsilon=1e-06, 
                             rotation_range=0, 
                             width_shift_range=0.0,
                             height_shift_range=0.0, 
                             brightness_range=None, 
                             shear_range=0.0, 
                             zoom_range=0.0,
                             channel_shift_range=0.0, 
                             fill_mode='nearest', 
                             cval=0.0,
                             horizontal_flip=False, 
                             vertical_flip=False, 
                             rescale=1./255.,
                             preprocessing_function=None, 
                             data_format=None, 
                             validation_split=0.0, 
                             dtype=None)

In [None]:
test_generator = datagen.flow(test_data,
                              batch_size=4, 
                              shuffle=False, 
                              sample_weight=None, 
                              seed=None,
                              save_to_dir=None, 
                              save_prefix='', 
                              save_format='png',
                              subset=None,)

# Function

## Creating model

In [None]:
!pip install --quiet tensorflow_addons
!pip install --quiet vit-keras
!pip install --quiet mlp-mixer-keras

In [None]:
from tensorflow.keras.applications import VGG16, ResNet50, EfficientNetB0
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, BatchNormalization, LeakyReLU, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow_addons.losses import SigmoidFocalCrossEntropy
from vit_keras import vit, utils
from mlp_mixer_keras import MlpMixerModel

### creating vgg model

In [None]:
def creating_vgg_model():

  input = Input(shape=(224, 224, 3))

  conv_base = VGG16(include_top=False, 
                    weights='imagenet', 
                    input_tensor=input)
 
  _ = GlobalAveragePooling2D()(conv_base.output)
  
  _ = Dense(13)(_)
  
  output = Activation('softmax')(_)

  model = Model(inputs=[input], outputs=[output])
  
  model.compile(optimizer=Adam(learning_rate=1e-5),
                loss=SigmoidFocalCrossEntropy(),
                metrics=['accuracy'])

  return model

### creating resnet model

In [None]:
def creating_resnet_model():

  input = Input(shape=(224, 224, 3))

  conv_base = ResNet50(include_top=False, 
                       weights='imagenet', 
                       input_tensor=input)
  
  _ = conv_base(input)

  _ = GlobalAveragePooling2D()(_)
  
  _ = Dense(13)(_)
  
  output = Activation('softmax')(_)

  model = Model(inputs=[input], outputs=[output])
  
  model.compile(optimizer=Adam(learning_rate=1e-5),
                loss=SigmoidFocalCrossEntropy(),
                metrics=['accuracy'])

  return model

### creating efficient model

In [None]:
def creating_efficient_model():

  input = Input(shape=(224, 224, 3))

  conv_base = EfficientNetB0(include_top=False, 
                             weights='imagenet', 
                             input_tensor=input)
  
  _ = GlobalAveragePooling2D()(conv_base.output)
  
  _ = Dense(13)(_)
  
  output = Activation('softmax')(_)

  model = Model(inputs=[input], outputs=[output])
  
  model.compile(optimizer=Adam(learning_rate=1e-5),
                loss=SigmoidFocalCrossEntropy(),
                metrics=['accuracy'])

  return model

### creating vit model

[pip install vit-keras](https://pypi.org/project/vit-keras/)

In [None]:
def creating_vit_model():

  model = Sequential()

  # vit_b16, vit_b32, vit_l16, vit_l32 #
  model.add(vit.vit_b16(image_size=224, 
                        activation='sigmoid',
                        pretrained=True, 
                        include_top=True, 
                        pretrained_top=False, 
                        classes=13,))
  
  model.add(Activation('softmax'))
  
  model.compile(optimizer=Adam(learning_rate=1e-5),
                loss=SigmoidFocalCrossEntropy(),
                metrics=['accuracy'])

  return model

### creating mlp mixer model

[pip install mpl-mixer-keras](https://pypi.org/project/mlp-mixer-keras/)

[reference parameter](https://qiita.com/T-STAR/items/dcaa7873a6d193912ed1)

In [None]:
def creating_mlp_mixer_model():

  model = MlpMixerModel(input_shape=(224, 224, 3),
                        num_classes=13, 
                        num_blocks=4, 
                        patch_size=8,
                        hidden_dim=32,
                        tokens_mlp_dim=64,
                        channels_mlp_dim=128,
                        use_softmax=True)

  model.compile(optimizer=Adam(learning_rate=1e-5),
                loss=SigmoidFocalCrossEntropy(),
                metrics=['accuracy'])

  return model

## Ensemble

In [None]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from sklearn.metrics import accuracy_score

In [None]:
def cross_val_score_for_ensemble(X_train, Y_train, epochs, batch_size, model_path, n_splits=10):

  skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=2021)

  scores = []
  num = 0

  for train_idx, val_idx in skf.split(X_train, Y_train.argmax(axis=1)):
    
    train_data_for_generator, train_label_for_generator, val_data_for_generator, val_label_for_generator = X_train[train_idx], Y_train[train_idx], X_train[val_idx], Y_train[val_idx]

    train_generator = datagen.flow(train_data_for_generator, train_label_for_generator,
                                   batch_size=4, 
                                   shuffle=True, 
                                   sample_weight=None,
                                   seed=2021,
                                   save_to_dir=None, 
                                   save_prefix='', 
                                   save_format='png',
                                   subset=None,)
    
    val_generator = datagen.flow(val_data_for_generator, val_label_for_generator,
                                 batch_size=4, 
                                 shuffle=False, 
                                 sample_weight=None, 
                                 seed=2021,
                                 save_to_dir=None, 
                                 save_prefix='', 
                                 save_format='png',
                                 subset=None,)

    # model = creating_vgg_model()
    # model = creating_resnet_model()
    # model = creating_efficient_model()
    model = creating_vit_model()
    # model = creating_mlp_mixer_model()
    callbacks_list = [ModelCheckpoint(filepath=str(num)+model_path, monitor='val_accuracy', save_best_only=True),]
    history = model.fit(train_generator, 
                        epochs=epochs, 
                        batch_size=batch_size, 
                        callbacks=callbacks_list, 
                        verbose=0, 
                        validation_data=val_generator)
    model.load_weights(str(num)+model_path)
    num += 1
    score = accuracy_score(np.argmax(val_label_for_generator, axis=1), np.argmax(model.predict(val_generator), axis=1))
    scores.append(score)
    print('accuracy: {0:.3f}'.format(score))

  return scores

# Training

In [None]:
from time import time

In [None]:
EPOCHS = 100
BATCH_SIZE = 4

start_time = time()
acc = cross_val_score_for_ensemble(train_data, train_label, EPOCHS, BATCH_SIZE, 'model.h5', 5)
elapsed_time = time() - start_time

print('Elapsed time: {0:.3f} hrs'.format(elapsed_time / 3600))
print('Mean accuracy for cv: {0:.3f}'.format(np.mean(acc)))

Downloading data from https://github.com/faustomorales/vit-keras/releases/download/dl/ViT-B_16_imagenet21k+imagenet2012.npz




accuracy: 0.542
accuracy: 0.511
accuracy: 0.511
accuracy: 0.588
accuracy: 0.554
Elapsed time: 2.723 hrs
Mean accuracy for cv: 0.541


# Submission

In [None]:
import pandas as pd
from google.colab import files

## bagging

### preparing data

In [None]:
train_generator = datagen.flow(train_data, train_label,
                            batch_size=4, 
                            shuffle=True, 
                            sample_weight=None, 
                            seed=2021,
                            save_to_dir=None, 
                            save_prefix='', 
                            save_format='png',
                            subset=None,)

print('train_data shape: {0}, train_label shape: {1}'.format(train_data.shape, train_label.shape))

train_data shape: (654, 224, 224, 3), train_label shape: (654, 13)


In [None]:
# model = creating_vgg_model()
# model = creating_resnet_model()
# model = creating_efficient_model()
model = creating_vit_model()
# model = creating_mlp_mixer_model()

model.load_weights(str(0)+'model.h5')
predicted = model.predict(test_data)
table = model.predict(train_generator)
table_test = model.predict(test_data)
for i in range(1, 5):
  model.load_weights(str(i)+'model.h5')
  predicted += model.predict(test_data)
  table = np.concatenate([table, model.predict(train_generator)], axis=1)
  table_test = np.concatenate([table_test, model.predict(test_data)], axis=1)

print('predicted shape: {0}'.format(predicted.shape))
print('table shape: {0}'.format(table.shape))
print('table_test shape: {0}'.format(table_test.shape))



predicted shape: (497, 13)
table shape: (654, 65)
table_test shape: (497, 65)


## saving predicted and table

In [None]:
PREDICTED_NAME = 'vit'
np.save(PREDICTED_NAME+'.npy', predicted)
np.save(PREDICTED_NAME+'_for_stacking.npy',table)
np.save(PREDICTED_NAME+'_for_stacking_with_test.npy',table_test)

## creating csv for submission

In [None]:
predicted = np.argmax(predicted, axis=1)

idx = np.arange(1, 498)

df = pd.concat([pd.DataFrame(idx, columns=['id']), pd.DataFrame(predicted, columns=['y'])], axis=1)

df.to_csv('submission.csv', index=False)

df.head()

Unnamed: 0,id,y
0,1,12
1,2,12
2,3,11
3,4,11
4,5,12


In [None]:
files.download(PREDICTED_NAME+'.npy')
files.download(PREDICTED_NAME+'_for_stacking.npy')
files.download('submission.csv')
files.download(PREDICTED_NAME+'_for_stacking_with_test.npy')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Result

# CrossEntropy

|note|max accuracy(hold-out)|accuracy(cv=10)|LB (hold-out)|LB (bagging)|
|:--:|:--:|:--:|:--:|:--:|
|VGG16|0.427|-||-|
|EfficientNet|0.427|0.477||0.531|
|ViT|0.573|-|0.484|0.500|
|MLP-Mixer|-|0.469|-|0.578|
|ResNet50|0.519|0.523|0.594|0.594|

## Focal Loss vs CrossEntropy (VGG16)

|note|accuracy (hold-out)|LB|
|:--:|:--:|:--:|
|crossentropy|0.443|0.438|
|focal loss|0.420|0.453|

## Focal Loss

|note|max accuracy(hold-out)|accuracy(cv=10)|LB (hold-out)|LB (bagging)|
|:--:|:--:|:--:|:--:|:--:|
|VGG16|-|-|-|-|
|ResNet|-|0.486|-|0.531|
|EfficientNet|-|0.578|-|0.531|
|ViT(b16)|-|0.570|-|0.609|
|ViT(l16)|0.588|0.612|-|0.609|
|MLP-Mixer|-|-|-|-|
|Fine-tunned ResNet|0.511|-|0.391|-|

## Preprocessing

|note|max accuracy(hold-out)|accuracy (cv=10)|LB (hold-out)|
|:--:|:--:|:--:|:--:|
|brightness (=0.7), VGG16, Focal Loss|0.450||0.422|
|brightness (=0.7), VGG16, Categorical CrossEntropy|0.489||0.359|
|brightness (=0.7), vit-l16, Focal Loss||0.576|0.578|
|brightness (=0.7), ResNet50, Categorical CrossEntropy|0.473||0.516|

# End