## Resnet

In [1]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import add
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K

class ResNet:
	@staticmethod
	def residual_module(data, K, stride, chanDim, red=False,
		reg=0.0001, bnEps=2e-5, bnMom=0.9):
		# the shortcut branch of the ResNet module should be
		# initialize as the input (identity) data
		shortcut = data

		# the first block of the ResNet module are the 1x1 CONVs
		bn1 = BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(data)
		act1 = Activation("relu")(bn1)
		conv1 = Conv2D(int(K * 0.25), (1, 1), use_bias=False,
			kernel_regularizer=l2(reg))(act1)

		# the second block of the ResNet module are the 3x3 CONVs
		bn2 = BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(conv1)
		act2 = Activation("relu")(bn2)
		conv2 = Conv2D(int(K * 0.25), (3, 3), strides=stride,
			padding="same", use_bias=False,
			kernel_regularizer=l2(reg))(act2)

		# the third block of the ResNet module is another set of 1x1
		# CONVs
		bn3 = BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(conv2)
		act3 = Activation("relu")(bn3)
		conv3 = Conv2D(K, (1, 1), use_bias=False,
			kernel_regularizer=l2(reg))(act3)

		# if we are to reduce the spatial size, apply a CONV layer to
		# the shortcut
		if red:
			shortcut = Conv2D(K, (1, 1), strides=stride,
				use_bias=False, kernel_regularizer=l2(reg))(act1)

		# add together the shortcut and the final CONV
		x = add([conv3, shortcut])

		# return the addition as the output of the ResNet module
		return x

	@staticmethod
	def build(width, height, depth, classes, stages, filters,
		reg=0.0001, bnEps=2e-5, bnMom=0.9, dataset="cifar"):
		# initialize the input shape to be "channels last" and the
		# channels dimension itself
		inputShape = (height, width, depth)
		chanDim = -1

		# if we are using "channels first", update the input shape
		# and channels dimension
		if K.image_data_format() == "channels_first":
			inputShape = (depth, height, width)
			chanDim = 1

		# set the input and apply BN
		inputs = Input(shape=inputShape)
		x = BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(inputs)

		# check if we are utilizing the CIFAR dataset
		if dataset == "cifar":
			# apply a single CONV layer
			x = Conv2D(filters[0], (3, 3), use_bias=False,
				padding="same", kernel_regularizer=l2(reg))(x)

		# check to see if we are using the Tiny ImageNet dataset
		elif dataset == "tiny_imagenet":
			# apply CONV => BN => ACT => POOL to reduce spatial size
			x = Conv2D(filters[0], (5, 5), use_bias=False,
				padding="same", kernel_regularizer=l2(reg))(x)
			x = BatchNormalization(axis=chanDim, epsilon=bnEps,
				momentum=bnMom)(x)
			x = Activation("relu")(x)
			x = ZeroPadding2D((1, 1))(x)
			x = MaxPooling2D((3, 3), strides=(2, 2))(x)

		# loop over the number of stages
		for i in range(0, len(stages)):
			# initialize the stride, then apply a residual module
			# used to reduce the spatial size of the input volume
			stride = (1, 1) if i == 0 else (2, 2)
			x = ResNet.residual_module(x, filters[i + 1], stride,
				chanDim, red=True, bnEps=bnEps, bnMom=bnMom)

			# loop over the number of layers in the stage
			for j in range(0, stages[i] - 1):
				# apply a ResNet module
				x = ResNet.residual_module(x, filters[i + 1],
					(1, 1), chanDim, bnEps=bnEps, bnMom=bnMom)

		# apply BN => ACT => POOL
		x = BatchNormalization(axis=chanDim, epsilon=bnEps,
			momentum=bnMom)(x)
		x = Activation("relu")(x)
		x = AveragePooling2D((8, 8))(x)

		# softmax classifier
		x = Flatten()(x)
		x = Dense(classes, kernel_regularizer=l2(reg))(x)
		x = Activation("softmax")(x)

		# create the model
		model = Model(inputs, x, name="resnet")

		# return the constructed network architecture
		return model

2021-10-11 12:58:59.174073: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-11 12:58:59.174110: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## RCNN

In [2]:

from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, Reshape,MaxPool2D,Dense
from tensorflow.keras.layers import Bidirectional, LSTM, Lambda
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import SGD
import numpy as np
#from configuration import window_height, window_width, MPoolLayers_ALL, LastFilters, NUnits

import tensorflow as tf

import math
import cv2

import string


In [3]:

#General configurations
window_height = 64  #windown height
window_width = 64   #window width
window_shift = window_width - 2 #window shift

#CNN related configurations
MPoolLayers_ALL = 5	#Nbr of all maxpool layers
MPoolLayers_H = 2	#Nbr of maxpool in horizontal dimension
LastFilters = 512	#Nbr of feature maps at the last conv layer

#LSTM related configurations
NUnits = 256    #Number of units in forward/backward LSTM
NLayers = 3     #Number of layers in BLSTM

In [4]:
#%%
FV = int(window_height / math.pow(2, MPoolLayers_ALL))
NFeatures = FV * LastFilters


In [5]:
char_list = string.ascii_letters+string.digits
l="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

In [6]:

#%%
input_data = Input(shape=(window_height, window_width, 1))

convolution1 = Conv2D(filters=64, kernel_size=(1,1))(input_data)
convolution1 = BatchNormalization(axis = -1)(convolution1)
convolution1 = Activation("relu")(convolution1)

convolution2 = Conv2D(filters=64, kernel_size=(1,1))(convolution1)
convolution2 = BatchNormalization(axis = -1)(convolution2)
convolution2 = Activation("relu")(convolution2)

pooling1 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(convolution2)

convolution3 = Conv2D(filters=128, kernel_size=(1,1))(pooling1)
convolution3 = BatchNormalization(axis = -1)(convolution3)
convolution3 = Activation("relu")(convolution3)

convolution4 = Conv2D(filters=128, kernel_size=(1,1))(convolution3)
convolution4 = BatchNormalization(axis = -1)(convolution4)
convolution4 = Activation("relu")(convolution4)

pooling2 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(convolution4)

convolution5 = Conv2D(filters=256, kernel_size=(1,1))(pooling2)
convolution5 = BatchNormalization(axis = -1)(convolution5)
convolution5 = Activation("relu")(convolution5)

convolution6 = Conv2D(filters=256, kernel_size=(1,1))(convolution5)
convolution6 = BatchNormalization(axis = -1)(convolution6)
convolution6 = Activation("relu")(convolution6)

convolution7 = Conv2D(filters=256, kernel_size=(1,1))(convolution6)
convolution7 = BatchNormalization(axis = -1)(convolution7)
convolution7 = Activation("relu")(convolution7)

pooling3 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution7)

convolution8 = Conv2D(filters=512, kernel_size=(1,1))(pooling3)
convolution8 = BatchNormalization(axis = -1)(convolution8)
convolution8 = Activation("relu")(convolution8)

convolution9 = Conv2D(filters=512, kernel_size=(1,1))(convolution8)
convolution9 = BatchNormalization(axis = -1)(convolution9)
convolution9 = Activation("relu")(convolution9)

convolution10 = Conv2D(filters=512, kernel_size=(1,1))(convolution9)
convolution10= BatchNormalization(axis = -1)(convolution10)
convolution10 = Activation("relu")(convolution10)

pooling4 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution10)

convolution11 = Conv2D(filters=512, kernel_size=(1,1))(pooling4)
convolution11= BatchNormalization(axis = -1)(convolution11)
convolution11 = Activation("relu")(convolution11)

convolution12 = Conv2D(filters=512, kernel_size=(1,1))(convolution11)
convolution12= BatchNormalization(axis = -1)(convolution12)
convolution12 = Activation("relu")(convolution12)

convolution13 = Conv2D(filters=512, kernel_size=(1,1))(convolution12)
convolution13= BatchNormalization(axis = -1)(convolution13)
convolution13 = Activation("relu")(convolution13)

pooling5 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution13)

convolution_full = Reshape(target_shape=(LastFilters * FV, 16))(pooling5)

bidir_LSTM1 = Bidirectional(LSTM(units = NUnits, return_sequences=True))(convolution_full)
bidir_LSTM2 = Bidirectional(LSTM(units = NUnits, return_sequences=True))(bidir_LSTM1)
#y_pred = Bidirectional(LSTM(units = NUnits))(bidir_LSTM2)
outputs = Dense(len(char_list)+1, activation = 'softmax')(bidir_LSTM2)


2021-10-11 12:59:14.830350: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-10-11 12:59:14.830389: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-10-11 12:59:14.830412: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (nb-1061): /proc/driver/nvidia/version does not exist
2021-10-11 12:59:14.830730: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## RCNN - 2

In [7]:
inputs = Input(shape=(32,32,1))

# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)

conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)

conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)

conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)

conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)

conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)

conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)

squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)

# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)

outputs = Dense(len(l)+1, activation = 'softmax')(blstm_2)

act_model = Model(inputs, outputs)

In [8]:
act_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 32, 1)]       0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 32, 32, 64)        640       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 8, 8, 256)         295168    
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 8, 8, 256)         590080