In [None]:
import tensorflow as tf
import math
from tensorflow.keras import optimizers,regularizers
from tensorflow.keras.callbacks import LearningRateScheduler, TensorBoard, ModelCheckpoint,ReduceLROnPlateau
import  matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Model

from tensorflow.keras.models import load_model
from  tensorflow.keras.layers import*
if __name__ == '__main__':
    # GPU settings
    gpus= tf.config.experimental.list_physical_devices('GPU')

    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

def round_filters(filters, multiplier):
    depth_divisor = 8
    min_depth = None
    min_depth = min_depth or depth_divisor
    filters = filters * multiplier
    new_filters = max(min_depth, int(filters + depth_divisor / 2) // depth_divisor * depth_divisor)
    if new_filters < 0.9 * filters:
        new_filters += depth_divisor
    return int(new_filters)


def round_repeats(repeats, multiplier):
    if not multiplier:
        return repeats
    return int(math.ceil(multiplier * repeats))


class SEBlock(tf.keras.layers.Layer):
    def __init__(self, input_channels, ratio=0.25):
        super(SEBlock, self).__init__()
        self.num_reduced_filters = max(1, int(input_channels * ratio))
        self.pool = tf.keras.layers.GlobalAveragePooling2D()
        self.reduce_conv = tf.keras.layers.Conv2D(filters=self.num_reduced_filters,
                                                  kernel_size=(1, 1),
                                                  strides=1,
                                                  padding="same")
        self.expand_conv = tf.keras.layers.Conv2D(filters=input_channels,
                                                  kernel_size=(1, 1),
                                                  strides=1,
                                                  padding="same")

    def call(self, inputs, **kwargs):
        branch = self.pool(inputs)
        branch = tf.expand_dims(input=branch, axis=1)
        branch = tf.expand_dims(input=branch, axis=1)
        branch = self.reduce_conv(branch)
        branch = tf.nn.swish(branch)
        branch = self.expand_conv(branch)
        branch = tf.nn.sigmoid(branch)
        output = inputs * branch
        return output


class MBConv(tf.keras.layers.Layer):
    def __init__(self, in_channels, out_channels, expansion_factor, stride, k, drop_connect_rate):
        super(MBConv, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride
        self.drop_connect_rate = drop_connect_rate
        self.conv1 = tf.keras.layers.Conv2D(filters=in_channels * expansion_factor,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same",
                                            use_bias=False)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k),
                                                      strides=stride,
                                                      padding="same",
                                                      use_bias=False)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.se = SEBlock(input_channels=in_channels * expansion_factor)
        self.conv2 = tf.keras.layers.Conv2D(filters=out_channels,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same",
                                            use_bias=False)
        self.bn3 = tf.keras.layers.BatchNormalization()
        self.dropout = tf.keras.layers.Dropout(rate=drop_connect_rate)

    def call(self, inputs, training=None, **kwargs):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.swish(x)
        x = self.dwconv(x)
        x = self.bn2(x, training=training)
        x = self.se(x)
        x = tf.nn.swish(x)
        x = self.conv2(x)
        x = self.bn3(x, training=training)
        if self.stride == 1 and self.in_channels == self.out_channels:
            if self.drop_connect_rate:
                x = self.dropout(x, training=training)
            x = tf.keras.layers.add([x, inputs])
        return x


def build_mbconv_block(in_channels, out_channels, layers, stride, expansion_factor, k, drop_connect_rate):
    block = tf.keras.Sequential()
    for i in range(layers):
        if i == 0:
            block.add(MBConv(in_channels=in_channels,
                             out_channels=out_channels,
                             expansion_factor=expansion_factor,
                             stride=stride,
                             k=k,
                             drop_connect_rate=drop_connect_rate))
        else:
            block.add(MBConv(in_channels=out_channels,
                             out_channels=out_channels,
                             expansion_factor=expansion_factor,
                             stride=1,
                             k=k,
                             drop_connect_rate=drop_connect_rate))
    return block


class EfficientNet(tf.keras.Model):
    def __init__(self, width_coefficient, depth_coefficient, dropout_rate, drop_connect_rate=0.2):
        super(EfficientNet, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(filters=round_filters(32, width_coefficient),
                                            kernel_size=(3, 3),
                                            strides=2,
                                            padding="same",
                                            use_bias=False)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.block1 = build_mbconv_block(in_channels=round_filters(32, width_coefficient),
                                         out_channels=round_filters(16, width_coefficient),
                                         layers=round_repeats(1, depth_coefficient),
                                         stride=1,
                                         expansion_factor=1, k=3, drop_connect_rate=drop_connect_rate)
        self.block2 = build_mbconv_block(in_channels=round_filters(16, width_coefficient),
                                         out_channels=round_filters(24, width_coefficient),
                                         layers=round_repeats(2, depth_coefficient),
                                         stride=2,
                                         expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)
        self.block3 = build_mbconv_block(in_channels=round_filters(24, width_coefficient),
                                         out_channels=round_filters(40, width_coefficient),
                                         layers=round_repeats(2, depth_coefficient),
                                         stride=2,
                                         expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
        self.block4 = build_mbconv_block(in_channels=round_filters(40, width_coefficient),
                                         out_channels=round_filters(80, width_coefficient),
                                         layers=round_repeats(3, depth_coefficient),
                                         stride=2,
                                         expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)
        self.block5 = build_mbconv_block(in_channels=round_filters(80, width_coefficient),
                                         out_channels=round_filters(112, width_coefficient),
                                         layers=round_repeats(3, depth_coefficient),
                                         stride=1,
                                         expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
        self.block6 = build_mbconv_block(in_channels=round_filters(112, width_coefficient),
                                         out_channels=round_filters(192, width_coefficient),
                                         layers=round_repeats(4, depth_coefficient),
                                         stride=2,
                                         expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
        self.block7 = build_mbconv_block(in_channels=round_filters(192, width_coefficient),
                                         out_channels=round_filters(320, width_coefficient),
                                         layers=round_repeats(1, depth_coefficient),
                                         stride=1,
                                         expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)

        self.conv2 = tf.keras.layers.Conv2D(filters=round_filters(1280, width_coefficient),
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same",
                                            use_bias=False)
        self.conv3=tf.keras.layers.Conv2D(128, 1, strides=1, padding='same')
        self.conv4=tf.keras.layers.Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu')
        self.conv15 = tf.keras.layers.Conv2D(16, kernel_size=3, strides=1, padding='same', activation='relu')
        self.conv5 = tf.keras.layers.Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')
        self.conv16 = tf.keras.layers.Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')
        self.conv6 = tf.keras.layers.Conv2D(64, kernel_size=1, strides=1, padding='same', activation='relu')
        self.conv07  = tf.keras.layers.Conv2D(64, kernel_size=2, strides=1, padding='same', activation='relu')
        self.conv7 = tf.keras.layers.Conv2D(64, kernel_size=3, strides=2, padding='same', activation='relu')
        self.conv8=tf.keras.layers.Conv2D(128, kernel_size=1, strides=1, padding='same', activation='relu')
        self.conv18 = tf.keras.layers.Conv2D(128, kernel_size=1, strides=1, padding='same', activation='relu')
        self.conv9 = tf.keras.layers.Conv2D(128, kernel_size=2, strides=1, padding='same', activation='relu')
        self.conv19 = tf.keras.layers.Conv2D(128, kernel_size=2, strides=1, padding='same', activation='relu')
        self.conv29 = tf.keras.layers.Conv2D(128, kernel_size=2, strides=1, padding='same', activation='relu')

        self.conv10 = tf.keras.layers.Conv2D(256, kernel_size=2, strides=1, padding='same', activation='relu')
        self.conv100 = tf.keras.layers.Conv2D(256, kernel_size=2, strides=1, padding='same', activation='relu')
        self.conv11 = tf.keras.layers.Conv2D(256, kernel_size=1, strides=1, padding='same', activation='relu')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.pool = tf.keras.layers.GlobalAveragePooling2D()
        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
        self.fc = tf.keras.layers.Dense(units=100,
                                        activation=tf.keras.activations.softmax)

    def __call__(self, inputs, training=None, mask=None):
        # model_1 = load_model('E:/python/tensorflow2.2/Basic_CNNs_TensorFlow2/logs/weights_500-0.0030.h5', compile=None)
        #
        # c5 = Model(inputs=model_1.input, outputs=model_1.get_layer('conv2d_2').output)
        # c6 = c5(inputs)
        x = self.conv1(inputs)
        # x(112,112,32)
        x = self.bn1(x, training=training)
        x = tf.nn.swish(x)
        # net = tf.nn.conv2d(inputs, kernel, strides=[1, 1, 1, 1], padding="SAME")
        net = self.conv4(inputs)
        print('1111',net.shape)
        net = self.conv15(net)
        net = MaxPooling2D()(net)
        x = Concatenate()([x, net])
        # x(112,112,32)
        x_1 = self.block1(x)
        # x(112,112,32)
        x_2 = self.block2(x_1)
        # x(56,56,24)
        net =  self.conv5(net)
        net =  self.conv16(net)
        net = MaxPooling2D()(net)
        # print(net.shape)
        # print(x_3.shape)
        x_2= Concatenate()([x_2, net])
        x_3 = self.block3(x_2)
        # 28,28
        net = self.conv6(net)
        net = self.conv07(net)
        net =  self.conv9(net)
        net = MaxPooling2D()(net)
        print(net.shape)
        print(x_3.shape)
        x_3 = Concatenate()([x_3, net])
        x_4 = self.block4(x_3)
        # 28,28

        x_5 = self.block5(x_4)
        # 28,28,112
        net = self.conv8(net)
        net = self.conv19(net)
        net = self.conv10 (net)
        net = MaxPooling2D()(net)
        x_6 = Concatenate()([x_5, net])
        x_6 = self.block6(x_6)
        # 14,14,192
        net = self.conv18(net)
        net = self.conv29(net)
        net =self.conv100(net)
        net = MaxPooling2D()(net)


        x_7 = self.block7(x_6)
        x_7 = self.conv2(x_7)
        print(x_7.shape)
        x = self.bn2(x_7, training=training)
        x = tf.nn.swish(x)
        x = self.pool(x)
        x = self.dropout(x, training=training)
        print(x.shape)
        out2=tf.keras.layers.GlobalAveragePooling2D()(x_6)
        out3 = tf.keras.layers.GlobalAveragePooling2D()(x_4)
        out= Concatenate()([x,out2,out3])
        return out


def get_efficient_net(width_coefficient, depth_coefficient, resolution, dropout_rate):
    x= EfficientNet(width_coefficient=width_coefficient,
                       depth_coefficient=depth_coefficient,
                       dropout_rate=dropout_rate)

    return x
model= get_efficient_net(1.0, 1.0, 224, 0.2)
model = tf.keras.Sequential([model,
                             tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(512,activation='relu'),
                             tf.keras.layers.Dropout(0.6),
                            tf.keras.layers.Dense(200, activation='softmax')])

model.build(input_shape=(None, 224, 224, 3))
model.summary()

train_dir='../input/bird200/train'
validation_dir='../input/bird200/valid'
train_datagen=ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

validation_datagen=ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)
# test_datagent=ImageDataGenerator(rescale=1./255)
train_generator=train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),
    batch_size=64,
    class_mode='categorical',

)
validation_generator=validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224,224),
    batch_size=64,
    class_mode='categorical'

)
model.compile(optimizer =optimizers.Adam(lr=0.0001), loss = 'categorical_crossentropy', metrics= ['accuracy'])
#using early stopping to exit training if validation loss is not decreasing even after certain epochs (patience)
# earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200)
# tensorboard=tf.keras.callbacks.TensorBoard(log_dir='logreffi2_dir',histogram_freq=1,embeddings_freq=1)
#save the best model with lower validation loss
# checkpointer = ModelCheckpoint(filepath="weight.hdf5", verbose=1, save_best_only=True)

# history = model.fit(train_generator, steps_per_epoch= train_generator.n // 32,
#                     epochs =200, validation_data= validation_generator,
#                     validation_steps= validation_generator.n // 32, callbacks=[earlystopping,tensorboard])
earlystopping           = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_accuracy', 
    mode = 'max' , 
    patience = 5,
    verbose = 1)

tensorboard             = tf.keras.callbacks.TensorBoard(
    log_dir='logseff_1_dir',
    histogram_freq=1,
    embeddings_freq=1)

reduceonplateau         = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    mode='auto', 
    epsilon=0.0001, 
    cooldown=0, 
    min_lr=0,
    factor=0.5,
    patience=3)
filepath = './best_weights.hdf5'
checkpoint    = tf.keras.callbacks.ModelCheckpoint(filepath, 
                                monitor = 'val_accuracy', 
                                mode='max', 
                                save_best_only=True, 
                                verbose = 1)
callback=[earlystopping,tensorboard,checkpoint,reduceonplateau]
history = model.fit(train_generator,
                    epochs=3,callbacks=callback,steps_per_epoch=train_generator.n // 64,
                    validation_data=validation_generator,validation_steps =validation_generator.n //64)