In [None]:
!pip install -U keras-cv-attention-models # Library to use the pre-trained models
!pip install tensorflow-addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-cv-attention-models
  Downloading keras_cv_attention_models-1.3.11-py3-none-any.whl (613 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m613.4/613.4 KB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-addons
  Downloading tensorflow_addons-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard>=2.7
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons, keras-cv-attention-models
Successfully installed keras-cv-attention-models-1.3.11 tensorflow-addons-0.19.0 typeguard-2.13.3
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Mar 13 11:06:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    56W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from psutil import virtual_memory

ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


# Preparing the data

In [None]:
import os
import numpy as np
import pandas as pd
import cv2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
dataset_dir = "gdrive/MyDrive/hyper-kvasir-dataset-final"

In [None]:
def get_dataCategories(dataset_dir):
    import glob

    categories = []
    for folder_name in os.listdir(dataset_dir):
        if os.path.isdir(os.path.join(dataset_dir, folder_name)):
            nbr_files = len(
                glob.glob(os.path.join(dataset_dir, folder_name) + "/*.jpg")
            )
            categories.append(np.array([folder_name, nbr_files]))

    categories.sort(key=lambda a: a[0])
    cat = np.array(categories)

    return list(cat[:, 0]), list(cat[:, 1])

categories, nbr_files = get_dataCategories(dataset_dir)

# Create DataFrame
df = pd.DataFrame({"categorie": categories, "numbre of files": nbr_files})
print("number of categories: ", len(categories))
df

number of categories:  23


Unnamed: 0,categorie,numbre of files
0,barretts,41
1,barretts-short-segment,53
2,bbps-0-1,646
3,bbps-2-3,1158
4,cecum,1027
5,dyed-lifted-polyps,1002
6,dyed-resection-margins,989
7,esophagitis-a,413
8,esophagitis-b-d,260
9,hemorrhoids,6


In [None]:
def get_x_y(datadir, categories, img_wid, img_high):
    X, y = [], []
    for category in categories:
        path = os.path.join(datadir, category)
        class_num = categories.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                ima_resize_rgb = cv2.resize(img_array, (img_wid, img_high))

                X.append(ima_resize_rgb)
                y.append(class_num)

            except Exception as e:
                print(e)

    y = np.array(y)
    X = np.array(X)

    # reshape X into img_wid x img_high x 3
    X = X.reshape(X.shape[0], img_wid, img_high, 3)

    return X, y


img_wid, img_high = 224, 224
X, y = get_x_y(dataset_dir, categories, img_wid, img_high)

print(f"X: {X.shape}")
print(f"y: {y.shape}")

X: (10740, 224, 224, 3)
y: (10740,)


## Split the data into train and test

In [None]:
from sklearn.model_selection import train_test_split

Y = np.reshape(y, (len(y), 1))


X_train, X_test, y_train, y_test = train_test_split(
    X, Y, train_size=0.5, random_state=42, stratify=Y
)

print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

X_train: (5370, 224, 224, 3)
y_train: (5370, 1)
X_test: (5370, 224, 224, 3)
y_test: (5370, 1)


# Preparing the pre-trained model

[Pre-trained models repository](https://github.com/leondgarse/keras_cv_attention_models)

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
import sklearn

## MobileVit model

In [None]:
from keras_cv_attention_models import mobilevit

In [None]:
model_1 = mobilevit.MobileViT_XS(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/mobilevit/mobilevit_xs_imagenet.h5
>>>> Load pretrained from: /root/.keras/models/mobilevit_xs_imagenet.h5




## BotNet model

In [None]:
from keras_cv_attention_models import botnet

In [None]:
model_2 = botnet.BotNetSE33T(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")



>>>> Load pretrained from: /root/.keras/models/botnet_se33t_256_imagenet.h5
>>>> Reload mismatched weights: 256 -> (224, 224)
>>>> Reload layer: stack2_block3_deep_2_mhsa_pos_emb
>>>> Reload layer: stack3_block3_deep_2_mhsa_pos_emb
>>>> Reload layer: stack4_block1_deep_2_mhsa_pos_emb
>>>> Reload layer: stack4_block2_deep_2_mhsa_pos_emb


## CMT model

In [None]:
from keras_cv_attention_models import cmt

In [None]:
model_3 = cmt.CMTTiny_torch(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/cmt/cmt_tiny_torch_160_imagenet.h5




>>>> Load pretrained from: /root/.keras/models/cmt_tiny_torch_160_imagenet.h5




>>>> Reload mismatched weights: 160 -> (224, 224)
>>>> Reload layer: stack1_pos_emb
>>>> Reload layer: stack2_pos_emb
>>>> Reload layer: stack3_pos_emb
>>>> Reload layer: stack4_pos_emb


## CoAtNet model

In [None]:
from keras_cv_attention_models import coatnet

In [None]:
model_4 = coatnet.CoAtNet0(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/coatnet/coatnet0_224_imagenet.h5
>>>> Load pretrained from: /root/.keras/models/coatnet0_224_imagenet.h5




## ConvNeXt model

In [None]:
from keras_cv_attention_models import convnext

In [None]:
model_5 = convnext.ConvNeXtLarge(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/convnext/convnext_large_224_imagenet.h5
>>>> Load pretrained from: /root/.keras/models/convnext_large_224_imagenet.h5




## DaViT model

In [None]:
from keras_cv_attention_models import davit

In [None]:
model_6 = davit.DaViT_B(input_shape=(224, 224, 3), num_classes=23, pretrained="imagenet")

Downloading data from https://github.com/leondgarse/keras_cv_attention_models/releases/download/davit/davit_b_imagenet.h5
>>>> Load pretrained from: /root/.keras/models/davit_b_imagenet.h5




# Custom metrics to add to the model

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score

class MetricsCallback(keras.callbacks.Callback):
        def __init__(self):
            super(MetricsCallback, self).__init__()

        def  on_train_begin(self,logs={}):
          self.f1_macro = []
          self.f1_micro = []
          self.precision_macro = []
          self.precision_micro = []
          self.recall_macro = []
          self.recall_micro = []

        def on_epoch_end(self, epoch, logs=None):
          y_pred = self.model.predict(X_test).round().argmax(axis=1)
          y_true = y_test

          score_f1_macro = f1_score(y_true, y_pred, average='macro')
          score_f1_micro = f1_score(y_true, y_pred, average='micro')
          score_precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
          score_precision_micro = precision_score(y_true, y_pred, average='micro', zero_division=0)
          score_recall_macro = recall_score(y_true, y_pred, average='macro')
          score_recall_micro = recall_score(y_true, y_pred, average='micro')

          self.f1_macro.append(score_f1_macro)
          self.f1_micro.append(score_f1_micro)
          self.precision_macro.append(score_precision_macro)
          self.precision_micro.append(score_precision_micro)
          self.recall_macro.append(score_recall_macro)
          self.recall_micro.append(score_recall_micro)

          best_f1_score_macro = max(self.f1_macro)
          best_f1_score_macro_epoch = self.f1_macro.index(best_f1_score_macro)

          best_f1_score_micro = max(self.f1_micro)
          best_f1_score_micro_epoch = self.f1_micro.index(best_f1_score_micro)

          # We have to calculate the F1 score at the end of every epoch, because
          # tensorflow works by batches for another metrics computing, so it's
          # required to create a custom callback to perform F1 score

          print(f""" - val_f1_macro: {score_f1_macro} - val_f1_micro: {score_f1_micro}
          - val_precision_macro: {score_precision_macro} - val_precision_micro: {score_precision_micro} - val_recall_macro: {score_recall_macro} - val_recall_micro: {score_recall_micro}
          - val_best_f1_score_macro: {best_f1_score_macro} (epoch: {best_f1_score_macro_epoch}) - val_best_f1_score_micro: {best_f1_score_micro} (epoch: {best_f1_score_micro})""")

# Training

## MobileVit

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_1.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_1.summary()

Model: "mobilevit_xs"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_pad (ZeroPadding2D)       (None, 226, 226, 3)  0           ['input_1[0][0]']                
                                                                                                  
 stem_conv (Conv2D)             (None, 112, 112, 16  432         ['stem_pad[0][0]']               
                                )                                                                 
                                                                                       

In [None]:
metrics = MetricsCallback()

model_1.fit(X_train, 
                  y_train, 
                  batch_size=256, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5
 6/34 [====>.........................] - ETA: 12s - loss: 2.4473 - accuracy: 0.4616 - top-5-accuracy: 0.7305



 - val_f1_macro: 0.49050621616909174 - val_f1_micro: 0.7160148975791435
          - val_precision_macro: 0.569724917388824 - val_precision_micro: 0.7160148975791434 - val_recall_macro: 0.4836733192990695 - val_recall_micro: 0.7160148975791434
          - val_best_f1_score_macro: 0.49050621616909174 (epoch: 0) - val_best_f1_score_micro: 0.7160148975791435 (epoch: 0.7160148975791435)
Epoch 2/5
 - val_f1_macro: 0.5527775655740709 - val_f1_micro: 0.8384543761638734
          - val_precision_macro: 0.6337172259303451 - val_precision_micro: 0.8384543761638734 - val_recall_macro: 0.5437500870023153 - val_recall_micro: 0.8384543761638734
          - val_best_f1_score_macro: 0.5527775655740709 (epoch: 1) - val_best_f1_score_micro: 0.8384543761638734 (epoch: 0.8384543761638734)
Epoch 3/5
 - val_f1_macro: 0.488789831360106 - val_f1_micro: 0.8226256983240223
          - val_precision_macro: 0.5306893379072324 - val_precision_micro: 0.8226256983240223 - val_recall_macro: 0.5026850397823966 - val_re

<keras.callbacks.History at 0x7f7e08296af0>

## BotNet

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_2.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_2.summary()

Model: "botnet_se33t"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_1_pad (ZeroPadding2D)     (None, 226, 226, 3)  0           ['input_6[0][0]']                
                                                                                                  
 stem_1_conv (Conv2D)           (None, 112, 112, 24  648         ['stem_1_pad[0][0]']             
                                )                                                                 
                                                                                       

In [None]:
metrics = MetricsCallback()

model_2.fit(X_train, 
                  y_train, 
                  batch_size=256, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5


  output, from_logits = _get_logits(






 - val_f1_macro: 0.05075291694504499 - val_f1_micro: 0.16815642458100558
          - val_precision_macro: 0.09952055953467658 - val_precision_micro: 0.16815642458100558 - val_recall_macro: 0.08694614586458525 - val_recall_micro: 0.16815642458100558
          - val_best_f1_score_macro: 0.05075291694504499 (epoch: 0) - val_best_f1_score_micro: 0.16815642458100558 (epoch: 0.16815642458100558)
Epoch 2/5
 - val_f1_macro: 0.3732691671516131 - val_f1_micro: 0.6612662942271881
          - val_precision_macro: 0.4578227118484282 - val_precision_micro: 0.6612662942271881 - val_recall_macro: 0.4035504379083312 - val_recall_micro: 0.6612662942271881
          - val_best_f1_score_macro: 0.3732691671516131 (epoch: 1) - val_best_f1_score_micro: 0.6612662942271881 (epoch: 0.6612662942271881)
Epoch 3/5
 - val_f1_macro: 0.4767924931519783 - val_f1_micro: 0.7823091247672254
          - val_precision_macro: 0.5209461281809672 - val_precision_micro: 0.7823091247672254 - val_recall_macro: 0.4787018311883088

<keras.callbacks.History at 0x7f7df8403fa0>

## CMT model

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_3.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_3.summary()

Model: "cmt_tiny_torch"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_10 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_1_pad (ZeroPadding2D)     (None, 226, 226, 3)  0           ['input_10[0][0]']               
                                                                                                  
 stem_1_conv (Conv2D)           (None, 112, 112, 16  448         ['stem_1_pad[0][0]']             
                                )                                                                 
                                                                                     

In [None]:
metrics = MetricsCallback()

model_3.fit(X_train, 
                  y_train, 
                  batch_size=128, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5
 - val_f1_macro: 0.4634533083546173 - val_f1_micro: 0.7541899441340782
          - val_precision_macro: 0.5234864125221871 - val_precision_micro: 0.7541899441340782 - val_recall_macro: 0.45137467673170745 - val_recall_micro: 0.7541899441340782
          - val_best_f1_score_macro: 0.4634533083546173 (epoch: 0) - val_best_f1_score_micro: 0.7541899441340782 (epoch: 0.7541899441340782)
Epoch 2/5
 - val_f1_macro: 0.4979252317064144 - val_f1_micro: 0.8108007448789571
          - val_precision_macro: 0.5653687287010227 - val_precision_micro: 0.8108007448789571 - val_recall_macro: 0.512170582754925 - val_recall_micro: 0.8108007448789571
          - val_best_f1_score_macro: 0.4979252317064144 (epoch: 1) - val_best_f1_score_micro: 0.8108007448789571 (epoch: 0.8108007448789571)
Epoch 3/5
 - val_f1_macro: 0.5488917577095586 - val_f1_micro: 0.8461824953445065
          - val_precision_macro: 0.5911441804259413 - val_precision_micro: 0.8461824953445065 - val_recall_macro: 0.541634092622421

<keras.callbacks.History at 0x7f82e26dafd0>

## CoAtNet

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_4.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_4.summary()

Model: "coatnet0"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_1_pad (ZeroPadding2D)     (None, 226, 226, 3)  0           ['input_14[0][0]']               
                                                                                                  
 stem_1_conv (Conv2D)           (None, 112, 112, 64  1728        ['stem_1_pad[0][0]']             
                                )                                                                 
                                                                                           

In [None]:
metrics = MetricsCallback()

model_4.fit(X_train, 
                  y_train, 
                  batch_size=32, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5


  output, from_logits = _get_logits(


 - val_f1_macro: 0.3237135967428597 - val_f1_micro: 0.5860335195530726
          - val_precision_macro: 0.44881713853744876 - val_precision_micro: 0.5860335195530726 - val_recall_macro: 0.3468296879878797 - val_recall_micro: 0.5860335195530726
          - val_best_f1_score_macro: 0.3237135967428597 (epoch: 0) - val_best_f1_score_micro: 0.5860335195530726 (epoch: 0.5860335195530726)
Epoch 2/5
 - val_f1_macro: 0.42400687490690386 - val_f1_micro: 0.7374301675977654
          - val_precision_macro: 0.4690288522760323 - val_precision_micro: 0.7374301675977654 - val_recall_macro: 0.42102552214858097 - val_recall_micro: 0.7374301675977654
          - val_best_f1_score_macro: 0.42400687490690386 (epoch: 1) - val_best_f1_score_micro: 0.7374301675977654 (epoch: 0.7374301675977654)
Epoch 3/5
 - val_f1_macro: 0.4804561573275124 - val_f1_micro: 0.7675977653631285
          - val_precision_macro: 0.5005484906550262 - val_precision_micro: 0.7675977653631285 - val_recall_macro: 0.49662549285352353 - v

<keras.callbacks.History at 0x7f7e1438a0d0>

## ConvNeXt

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_5.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_5.summary()

Model: "convnext_large"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_17 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_conv (Conv2D)             (None, 56, 56, 192)  9408        ['input_17[0][0]']               
                                                                                                  
 stem_ln (LayerNormalization)   (None, 56, 56, 192)  384         ['stem_conv[0][0]']              
                                                                                                  
 stack1_block1_pad (ZeroPadding  (None, 62, 62, 192)  0          ['stem_ln[0][0]']   

In [None]:
metrics = MetricsCallback()

model_5.fit(X_train, 
                  y_train, 
                  batch_size=32, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5


  output, from_logits = _get_logits(


 - val_f1_macro: 0.4441349339985783 - val_f1_micro: 0.7692737430167598
          - val_precision_macro: 0.5218861584408652 - val_precision_micro: 0.7692737430167598 - val_recall_macro: 0.4776873070244286 - val_recall_micro: 0.7692737430167598
          - val_best_f1_score_macro: 0.4441349339985783 (epoch: 0) - val_best_f1_score_micro: 0.7692737430167598 (epoch: 0.7692737430167598)
Epoch 2/5
 - val_f1_macro: 0.46220845057676796 - val_f1_micro: 0.7724394785847298
          - val_precision_macro: 0.5318792541818154 - val_precision_micro: 0.7724394785847299 - val_recall_macro: 0.4766277557249463 - val_recall_micro: 0.7724394785847299
          - val_best_f1_score_macro: 0.46220845057676796 (epoch: 1) - val_best_f1_score_micro: 0.7724394785847298 (epoch: 0.7724394785847298)
Epoch 3/5
 - val_f1_macro: 0.5267508784679086 - val_f1_micro: 0.8562383612662943
          - val_precision_macro: 0.554417348827661 - val_precision_micro: 0.8562383612662943 - val_recall_macro: 0.5407263004404199 - val_r

<keras.callbacks.History at 0x7f82d12d2580>

## DaViT

In [None]:
from tensorflow_addons.optimizers import AdamW

learning_rate = 0.001
weight_decay = 0.0001

optimizer = AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    )

model_6.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy")
        ],
    )

model_6.summary()

Model: "davit_b"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_18 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 stem_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           ['input_18[0][0]']               
                                                                                                  
 stem_conv (Conv2D)             (None, 56, 56, 128)  18944       ['stem_pad[0][0]']               
                                                                                                  
 stem_ln (LayerNormalization)   (None, 56, 56, 128)  256         ['stem_conv[0][0]']        

In [None]:
metrics = MetricsCallback()

model_6.fit(X_train, 
                  y_train, 
                  batch_size=32, 
                  epochs=5, 
                  validation_data=(X_test, y_test), 
                  callbacks=[metrics])

Epoch 1/5
 - val_f1_macro: 0.4072599189113387 - val_f1_micro: 0.7216014897579144
          - val_precision_macro: 0.4280395437889379 - val_precision_micro: 0.7216014897579144 - val_recall_macro: 0.4206629592178578 - val_recall_micro: 0.7216014897579144
          - val_best_f1_score_macro: 0.4072599189113387 (epoch: 0) - val_best_f1_score_micro: 0.7216014897579144 (epoch: 0.7216014897579144)
Epoch 2/5
 - val_f1_macro: 0.43343410504149305 - val_f1_micro: 0.7597765363128491
          - val_precision_macro: 0.49291358121830386 - val_precision_micro: 0.7597765363128491 - val_recall_macro: 0.4543915843357619 - val_recall_micro: 0.7597765363128491
          - val_best_f1_score_macro: 0.43343410504149305 (epoch: 1) - val_best_f1_score_micro: 0.7597765363128491 (epoch: 0.7597765363128491)
Epoch 3/5
 - val_f1_macro: 0.4742485077856029 - val_f1_micro: 0.8167597765363128
          - val_precision_macro: 0.548517069701323 - val_precision_micro: 0.8167597765363128 - val_recall_macro: 0.4936084927544

<keras.callbacks.History at 0x7f82ccd50520>