In [1]:
import tensorflow as tf
import os
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
import numpy as np
import pandas as pd
import random
import tensorflow_hub as hub
from sklearn.metrics import classification_report

In [2]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 77

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)

In [3]:
def create_mapping():
    import pandas as pd
    mapper = pd.DataFrame()
    mapper['range']=[str(x) for x in range(75)]
    mapper.sort_values(by='range',inplace=True)
    mapper.reset_index(drop=True,inplace=True)
    mapper.reset_index(inplace=True)
    mapping_dict = {k:int(v) for k,v in zip(mapper['index'],mapper['range'])}
    return mapping_dict

mapping_dict = create_mapping()

def test_accuracy(input_ordered_pred):
    ## Returns the approximate accuracy on the test set. The better your model is, the more accurate the results will be
    from sklearn.metrics import accuracy_score,classification_report
    from scipy.stats import mode
    def find_true_labels(input_pred,window=3):
        ## Takes in input vector of predictions and outputs the true labels (approximate)
        padded = [0]*window+list(input_pred)+[74]*window
        true = []
        for i in range(len(input_pred)):
            true.append(mode(padded[i:i+(window*2)+1]).mode[0])
        return true
    true = find_true_labels(input_ordered_pred)
    return accuracy_score(true,input_ordered_pred),classification_report(true,input_ordered_pred)

In [4]:
directory_train = "../input/cv-assignment-2-224x224-training-set"

pixels = 224
batch_size = 128
num_class = 75

def generate_data_df_with_folds(kfold=10):
    files = pd.DataFrame()

    tmp = []
    labels_1 = []
    for i in range(num_class):
        for x,_,z in os.walk(directory_train+'/'+str(i)):
            for file in z:
                if 'desktop.ini' not in file:
                    filepath = x+'/'+file
                    tmp.append(filepath)
                    labels_1.append(i)

    labels_2 = [1 if i in range(15,74) else 0 for i in labels_1] #1 if food, 0 if place
    files['filepaths'] = pd.Series(tmp)
    files['target'] = pd.Series(labels_1)

    folds = []
    for i in range(num_class):
        n = files[files['target']==i].shape[0]
        tmp = []
        for fold in range(kfold):
            if fold != kfold-1:
                tmp += [fold]*(n//kfold)
            else:
                tmp+= [fold]*(n-len(tmp))
        random.shuffle(tmp)
        folds+=tmp
    files['fold'] = folds
    files['target'] = files['target'].astype(str)
    return files

files = generate_data_df_with_folds(kfold=10)

In [5]:
gen = tf.keras.preprocessing.image.ImageDataGenerator()
train = gen.flow_from_dataframe(files[files['fold']<9],x_col='filepaths',y_col='target',
                                class_mode='sparse',batch_size=batch_size,target_size=(pixels,pixels))
val = gen.flow_from_dataframe(files[files['fold']==9],x_col='filepaths',y_col='target',
                                class_mode='sparse',batch_size=batch_size,target_size=(pixels,pixels),shuffle=False)

Found 27684 validated image filenames belonging to 75 classes.
Found 3381 validated image filenames belonging to 75 classes.


In [6]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.5),
])

2021-11-08 16:33:41.128635: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-08 16:33:41.222624: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-08 16:33:41.223336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-08 16:33:41.225171: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [7]:
base = tf.keras.Sequential([hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_b8_fe/1", trainable=False)])

## This is the top layer trained for binary classification of food/place that I have brought over as a feature extractor
decoder = models.Sequential([
    layers.Dense(units=512,activation="selu"),
    layers.Dense(units=1, activation="tanh")
])

decoder.load_weights('../input/food-place-extractor/decoder/b8/decoder_weights')
decoder.trainable=False

In [8]:
img_inputs = tf.keras.Input(shape=(pixels, pixels, 3))
augment = data_augmentation(img_inputs)
rescaling = layers.experimental.preprocessing.Rescaling(scale=1./127.5, offset=-1)(augment)
base_model = base(rescaling)
decode = decoder(base_model)
drop = layers.Dropout(0.4,seed=seed_value)(base_model)
x = layers.Dense(units=768,activation="relu")(drop)
x = layers.Dropout(0.2,seed=seed_value)(x)
x = layers.Dense(units=512,activation="relu")(x)
mul = tf.keras.layers.Multiply()([decode,x])
concat = tf.keras.layers.Concatenate()([drop,mul])
output = layers.Dense(units=num_class, activation="softmax", name="Food_Place_75")(concat)

model = tf.keras.Model(inputs=img_inputs, outputs=output)

Multiply the output of the tanh decoder with the output of the vision transformer.

Since the decoder tanh output maps 1 to food and -1 to place, this hopefully shifts the weights of the hidden layer to food and places to become polar opposites. Since the binary classifier is 99% accurate, this should in theory help performance greatly.

From my observations, this seems to have the effect of accelerating convergence (for the first few epochs at least).

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 224, 224, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
rescaling (Rescaling)           (None, 224, 224, 3)  0           sequential[0][0]                 
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 768)          85807872    rescaling[0][0]                  
______________________________________________________________________________________________

In [10]:
optimizer = optimizers.Adam(learning_rate=4e-3)

model.compile(
    optimizer = optimizer, 
    loss = 'sparse_categorical_crossentropy', 
    metrics=[
    'accuracy',
    tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name="top_5_accuracy", dtype=None)
    ]
)

In [11]:
def scheduler(epoch, lr):
    if lr < 1e-6:
        return lr
    if epoch < 4:
        return lr * 0.9
    else:
        return lr * 0.8
    
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

lr_plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    patience=1, 
    verbose=1,
    factor=0.5,
    min_lr=1e-8
)

early_stoppage = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0.002, patience=3, verbose=1,
    mode='auto', baseline=None, restore_best_weights=True
)

callbacks_list = [lr_scheduler,lr_plateau,early_stoppage]

In [12]:
history_1 = model.fit(train,validation_data=val,callbacks=callbacks_list,epochs=20)

2021-11-08 16:34:00.190031: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/20


2021-11-08 16:34:05.317984: I tensorflow/compiler/xla/service/service.cc:171] XLA service 0x7ff81803f7a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2021-11-08 16:34:05.318028: I tensorflow/compiler/xla/service/service.cc:179]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
2021-11-08 16:34:05.659112: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2021-11-08 16:34:07.854670: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005
2021-11-08 16:34:23.458538: I tensorflow/compiler/jit/xla_compilation_cache.cc:363] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/20

Epoch 00002: ReduceLROnPlateau reducing learning rate to 0.0016200001118704677.
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00013759415014646947.
Epoch 12/20
Epoch 13/20

Epoch 00013: ReduceLROnPlateau reducing learning rate to 4.4030130084138364e-05.
Epoch 14/20

Epoch 00014: ReduceLROnPlateau reducing learning rate to 1.7612052033655345e-05.
Epoch 15/20
Restoring model weights from the end of the best epoch.
Epoch 00015: early stopping


In [13]:
model.save_weights('weights/vit_b8_mult_skip_fold1')

In [14]:
test_dir = '../input/food-places-test-224x224/processed_test'
test = tf.keras.utils.image_dataset_from_directory(test_dir,labels=None,image_size=(pixels,pixels),shuffle=False)
pred_test = model.predict(test)

Found 4315 files belonging to 1 classes.


In [15]:
test_df = pd.DataFrame()
test_df['Id'] = [int(x.replace(test_dir+'/','').replace('.jpg','')) for x in test.file_paths]
top_5 = pred_test.argsort(axis=-1)[:,-5:]
test_df[[5-i for i in range(5)]] = top_5
for i in range(1,6):
    test_df[i] = [mapping_dict[x] for x in test_df[i]]
test_df.sort_values(by='Id',inplace=True)
test_df.reset_index(drop=True,inplace=True)
test_df = test_df[['Id',1,2,3,4,5]]
test_df.columns = ['Id','Top 1','Top 2','Top 3','Top 4','Top 5']

In [16]:
test_acc, crep = test_accuracy(test_df['Top 1'])
print('Accuracy: ',test_acc)
print(crep)

Accuracy:  0.9001158748551564
              precision    recall  f1-score   support

           0       0.71      0.95      0.81        58
           1       0.94      0.96      0.95       104
           2       0.75      0.95      0.84       109
           3       0.99      0.88      0.93       124
           4       0.91      0.89      0.90        94
           5       0.98      0.96      0.97       105
           6       0.89      0.76      0.82        90
           7       1.00      1.00      1.00       100
           8       0.93      1.00      0.96        53
           9       1.00      0.97      0.98       100
          10       0.87      0.83      0.85        47
          11       0.99      0.96      0.98        84
          12       0.98      0.98      0.98        65
          13       0.97      0.92      0.94       101
          14       0.97      0.80      0.88        35
          15       1.00      1.00      1.00        50
          16       0.77      0.88      0.82        

In [17]:
fullset = gen.flow_from_dataframe(files,x_col='filepaths',y_col='target',class_mode='sparse',batch_size=batch_size,target_size=(pixels,pixels),shuffle=False)

Found 31065 validated image filenames belonging to 75 classes.


In [18]:
pred_train = model.predict(fullset)

In [19]:
pd.concat([files,pd.DataFrame(pred_train)],axis=1).to_csv('ViTb8_Train_output_fold1.csv',index=False)

In [20]:
test_df = pd.DataFrame()
test_df['Id'] = [int(x.replace(test_dir+'/','').replace('.jpg','')) for x in test.file_paths]

In [21]:
pd.concat([test_df,pd.DataFrame(pred_test)],axis=1).sort_values(by='Id').to_csv('ViTb8_Test_output.csv',index=False)