In [12]:
### STDC1 for Image classification

#Keras-tensorflow implementation

# -*- coding: utf-8 -*-


from functools import reduce
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow.keras.backend as K


#### Custom function for conv2d: conv_block
def conv_block(inputs, conv_type, kernel, kernel_size, strides, padding='same'):
  
      if(conv_type == 'ds'):
        x = tf.keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
      else:
        x = tf.keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)  

      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.relu(x)

      return x

def STDC_module(inputs, filters, kernel, s):
    
    
    tchannel1 = filters
    tchannel2 = tchannel1 // 2
    tchannel3 = tchannel1 // 4
    tchannel4 = tchannel1 // 8

    if s==2:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1)) #
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(s, s))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

        #Used MaxPooling instead of average pooling
        x1 = tf.keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x1)
    
    else:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1))
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(1, 1))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

    
    
    concat = tf.keras.layers.Concatenate()([x1, x2, x3, x4])

    return concat


"""#### STDC_block to control the repetition of STDC modules"""

def STDC_block(inputs, filters, kernel, strides, n):
      x = STDC_module(inputs, filters, kernel, strides)

      for i in range(1, n):
        x = STDC_module(x, filters, kernel, 1)

      return x    

def model(num_classes=19, input_size=(1024, 2048, 3)):

      # Input Layer
      input_layer = tf.keras.layers.Input(shape=input_size, name = 'input_layer')

      ## Step 1: Learning to DownSample
      convx1 = conv_block(input_layer, 'conv', 32, (3, 3), strides = (2, 2))

      convx2 = conv_block(convx1, 'conv', 64, (3, 3), strides = (2, 2))

      #STDC blocks  
      D3 = STDC_block(convx2, 256, (3, 3), strides=2, n=2)

      D4 = STDC_block(D3, 512, (3, 3), strides=2, n=2)

      D5 = STDC_block(D4, 1024, (3, 3), strides=2, n=2)
        
      convx6 = conv_block(D5, 'conv', 1024, (1, 1), strides = (1, 1))
    
      x = tf.keras.layers.GlobalAveragePooling2D()(convx6)
      x = tf.keras.layers.Dense(1024, activation=None)(x)
      x = tf.keras.layers.Dense(1000, activation=None)(x)

      STDC1 = tf.keras.Model(inputs = input_layer , outputs = x, name = 'STDC1')

      return STDC1

In [15]:
STDC1 = model(num_classes=1000, input_size=(1024, 2048, 3))
STDC1.summary()

Model: "STDC1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 1024, 2048,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d_210 (Conv2D)            (None, 512, 1024, 3  896         ['input_layer[0][0]']            
                                2)                                                                
                                                                                                  
 batch_normalization_210 (Batch  (None, 512, 1024, 3  128        ['conv2d_210[0][0]']             
 Normalization)                 2)                                                            

In [11]:
#install keras-floops to get the flop counts of the model

!pip install keras-flops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-flops
  Downloading keras_flops-0.1.2-py3-none-any.whl (5.3 kB)
Installing collected packages: keras-flops
Successfully installed keras-flops-0.1.2


In [16]:
#flops at 1024 x 2048x1536 resolution

#In litera

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

from keras_flops import get_flops


# Calculae FLOPS for STDC1 classification model
flops = get_flops(STDC1, batch_size=1)
print(f"FLOPS: {flops / 10 ** 9:.03} G")

FLOPS: 67.4 G


In [7]:
### STDC2 for image classification

#Keras-tensorflow implementation

# -*- coding: utf-8 -*-

from functools import reduce
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow.keras.backend as K


#### Custom function for conv2d: conv_block
def conv_block(inputs, conv_type, kernel, kernel_size, strides, padding='same'):
  
      if(conv_type == 'ds'):
        x = tf.keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
      else:
        x = tf.keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)  

      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.relu(x)

      return x

def STDC_module(inputs, filters, kernel, s):
    
    
    tchannel1 = filters
    tchannel2 = tchannel1 // 2
    tchannel3 = tchannel1 // 4
    tchannel4 = tchannel1 // 8

    if s==2:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1)) #
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(s, s))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

        #Used MaxPooling instead of average pooling
        x1 = tf.keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x1)
    
    else:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1))
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(1, 1))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

    
    
    concat = tf.keras.layers.Concatenate()([x1, x2, x3, x4])

    return concat


"""#### STDC_block to control the repetition of STDC modules"""

def STDC_block(inputs, filters, kernel, strides, n):
      x = STDC_module(inputs, filters, kernel, strides)

      for i in range(1, n):
        x = STDC_module(x, filters, kernel, 1)

      return x    

def model(num_classes=19, input_size=(1024, 2048, 3)):

      # Input Layer
      input_layer = tf.keras.layers.Input(shape=input_size, name = 'input_layer')

      ## Step 1: Learning to DownSample
      convx1 = conv_block(input_layer, 'conv', 32, (3, 3), strides = (2, 2))

      convx2 = conv_block(convx1, 'conv', 64, (3, 3), strides = (2, 2))

      #STDC blocks  
      D3 = STDC_block(convx2, 256, (3, 3), strides=2, n=4)

      D4 = STDC_block(D3, 512, (3, 3), strides=2, n=5)

      D5 = STDC_block(D4, 1024, (3, 3), strides=2, n=3)
        
      convx6 = conv_block(D5, 'conv', 1024, (1, 1), strides = (1, 1))
    
      x = tf.keras.layers.GlobalAveragePooling2D()(convx6)
      x = tf.keras.layers.Dense(1024, activation=None)(x)
      x = tf.keras.layers.Dense(1000, activation=None)(x)

      STDC2 = tf.keras.Model(inputs = input_layer , outputs = x, name = 'STDC2')

      return STDC2

In [10]:
# As Cityscapes provides images at 1024 x 2048 resolution and model performance is evaluated based on 19 objetc classes
# So we determine model FLOPs at full input resolution

STDC2 = model(num_classes=1000, input_size=(1024, 2048, 3))
STDC2.summary()

Model: "STDC2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 1024, 2048,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d_132 (Conv2D)            (None, 512, 1024, 3  896         ['input_layer[0][0]']            
                                2)                                                                
                                                                                                  
 batch_normalization_132 (Batch  (None, 512, 1024, 3  128        ['conv2d_132[0][0]']             
 Normalization)                 2)                                                            

In [11]:
#flops at 1024 x 2048 resolution

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

from keras_flops import get_flops


# Calculae FLOPS for STDC2 at 1024 X 2028 resolution
flops = get_flops(STDC2, batch_size=1)
print(f"FLOPS: {flops / 10 ** 9:.03} G")

FLOPS: 1.2e+02 G


In [21]:
#STDC1 semantic segmentation model without detail guidance

# -*- coding: utf-8 -*-


from functools import reduce
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow.keras.backend as K


#### Custom function for conv2d: conv_block
def conv_block(inputs, conv_type, kernel, kernel_size, strides, padding='same'):
  
      if(conv_type == 'ds'):
        x = tf.keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
      else:
        x = tf.keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)  

      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.relu(x)

      return x

def STDC_module(inputs, filters, kernel, s):
    
    
    tchannel1 = filters
    tchannel2 = tchannel1 // 2
    tchannel3 = tchannel1 // 4
    tchannel4 = tchannel1 // 8

    if s==2:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1)) #
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(s, s))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

        #Used MaxPooling instead of average pooling
        x1 = tf.keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x1)
    
    else:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1))
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(1, 1))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

    
    
    concat = tf.keras.layers.Concatenate()([x1, x2, x3, x4])

    return concat


"""#### STDC_block to control the repetition of STDC modules"""

def STDC_block(inputs, filters, kernel, strides, n):
      x = STDC_module(inputs, filters, kernel, strides)

      for i in range(1, n):
        x = STDC_module(x, filters, kernel, 1)

      return x    

#Feature Fusion Module
def ffm_block(f1, f2, kernel=256):
    ffm = tf.keras.layers.Concatenate()([f1, f2]) #256+128 = 384
    ffm = conv_block(ffm, 'conv', kernel, (1, 1), strides = (1, 1))
    atten = tf.nn.avg_pool2d(ffm, kernel, 1, 'SAME')
    atten = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(atten)
    atten = tf.keras.activations.relu(atten)
    
    atten = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(atten)
    atten = tf.keras.activations.sigmoid(atten)
    ffm_atten = tf.keras.layers.Multiply()([atten, ffm])
    ffm_out = tf.keras.layers.add([ffm_atten, ffm])
    
    return ffm_out
    
# Attention Refinement Module
def arm_block(inputs, kernel):
  
      #tchannel = tf.keras.backend.int_shape(inputs)[-1] * 2 
      x = conv_block(inputs, 'conv', kernel, (3, 3), strides = (1, 1))
      x = tf.nn.avg_pool2d(x, kernel, 1, 'SAME')
      x = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(x)
      x = conv_block(x, 'conv', kernel, (1, 1), strides = (1, 1))  
      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.sigmoid(x)
      x = tf.keras.layers.Multiply()([x, inputs])

      return x   
    

def model(num_classes=19, input_size=(1024, 2048, 3)):

      # Input Layer
      input_layer = tf.keras.layers.Input(shape=input_size, name = 'input_layer')

      ## Step 1: Learning to DownSample
      convx1 = conv_block(input_layer, 'conv', 32, (3, 3), strides = (2, 2))

      convx2 = conv_block(convx1, 'conv', 64, (3, 3), strides = (2, 2))

      #STDC blocks  
      D3 = STDC_block(convx2, 256, (3, 3), strides=2, n=2)

      D4 = STDC_block(D3, 512, (3, 3), strides=2, n=2)

      D5 = STDC_block(D4, 1024, (3, 3), strides=2, n=2)
        
      D5 = tf.nn.avg_pool2d(D5, 1024, 1, 'SAME')
      D5 = conv_block(D5, 'conv', 128, (1, 1), strides = (1, 1))
      D5 = tf.keras.layers.UpSampling2D((1, 1))(D5)
    
      D5_arm = arm_block(D5, 128)
      D5_arm = tf.keras.layers.add([D5_arm, D5])
      D5_up = tf.keras.layers.UpSampling2D((2, 2))(D5_arm)
      D5_up = conv_block(D5_up, 'conv', 128, (3, 3), strides = (1, 1))
      
      D4 = conv_block(D4, 'conv', 128, (1, 1), strides = (1, 1))
      D4_arm = arm_block(D4, 128)
      D4_arm = tf.keras.layers.add([D4_arm, D5_up])
      D4_up = tf.keras.layers.UpSampling2D((2, 2))(D4_arm)
      D4_up = conv_block(D4_up, 'conv', 256, (3, 3), strides = (1, 1))
    
      #feature fusion module
      ffm = ffm_block(D4_up, D3, 256)
      
      #classifier
      classifier = conv_block(ffm, 'conv', 256, (3, 3), strides = (1, 1))
      classifier = tf.keras.layers.Conv2D(num_classes, 1, padding='same', strides = 1)(classifier)
        
      classifier = tf.keras.layers.Dropout(0.3)(classifier)
      classifier = tf.keras.layers.UpSampling2D((8, 8))(classifier)
      classifier = tf.dtypes.cast(classifier, tf.float32)
      classifier = tf.keras.activations.softmax(classifier)

      STDC1_seg = tf.keras.Model(inputs = input_layer , outputs = classifier, name = 'STDC1_seg')

      return STDC1_seg

In [22]:
STDC1_seg = model(num_classes=19, input_size=(1024, 2048, 3))
STDC1_seg.summary()

Model: "STDC1_seg"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 1024, 2048,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d_305 (Conv2D)            (None, 512, 1024, 3  896         ['input_layer[0][0]']            
                                2)                                                                
                                                                                                  
 batch_normalization_302 (Batch  (None, 512, 1024, 3  128        ['conv2d_305[0][0]']             
 Normalization)                 2)                                                        

In [23]:
#flops at 1024 x 2048x1536 resolution

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

from keras_flops import get_flops


# Calculae FLOPS
flops = get_flops(STDC1_seg, batch_size=1)
print(f"FLOPS: {flops / 10 ** 9:.03} G")

FLOPS: 2.92e+03 G


In [24]:
#STDC2 semantic segmentation model without detail guidance

# -*- coding: utf-8 -*-


from functools import reduce
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow.keras.backend as K


#### Custom function for conv2d: conv_block
def conv_block(inputs, conv_type, kernel, kernel_size, strides, padding='same'):
  
      if(conv_type == 'ds'):
        x = tf.keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
      else:
        x = tf.keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)  

      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.relu(x)

      return x

def STDC_module(inputs, filters, kernel, s):
    
    
    tchannel1 = filters
    tchannel2 = tchannel1 // 2
    tchannel3 = tchannel1 // 4
    tchannel4 = tchannel1 // 8

    if s==2:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1)) #
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(s, s))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

        #Used MaxPooling instead of average pooling
        x1 = tf.keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x1)
    
    else:
        x1 = conv_block(inputs, 'conv', tchannel2, (1, 1), strides=(1, 1))
        x2 = conv_block(x1, 'conv', tchannel3, (3, 3), strides=(1, 1))
        x3 = conv_block(x2, 'conv', tchannel4, (3, 3), strides=(1, 1))
        x4 = conv_block(x3, 'conv', tchannel4, (3, 3), strides=(1, 1))

    
    
    concat = tf.keras.layers.Concatenate()([x1, x2, x3, x4])

    return concat


"""#### STDC_block to control the repetition of STDC modules"""

def STDC_block(inputs, filters, kernel, strides, n):
      x = STDC_module(inputs, filters, kernel, strides)

      for i in range(1, n):
        x = STDC_module(x, filters, kernel, 1)

      return x    

#Feature Fusion Module
def ffm_block(f1, f2, kernel=256):
    ffm = tf.keras.layers.Concatenate()([f1, f2]) #256+128 = 384
    ffm = conv_block(ffm, 'conv', kernel, (1, 1), strides = (1, 1))
    atten = tf.nn.avg_pool2d(ffm, kernel, 1, 'SAME')
    atten = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(atten)
    atten = tf.keras.activations.relu(atten)
    
    atten = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(atten)
    atten = tf.keras.activations.sigmoid(atten)
    ffm_atten = tf.keras.layers.Multiply()([atten, ffm])
    ffm_out = tf.keras.layers.add([ffm_atten, ffm])
    
    return ffm_out
    
# Attention Refinement Module
def arm_block(inputs, kernel):
  
      #tchannel = tf.keras.backend.int_shape(inputs)[-1] * 2 
      x = conv_block(inputs, 'conv', kernel, (3, 3), strides = (1, 1))
      x = tf.nn.avg_pool2d(x, kernel, 1, 'SAME')
      x = tf.keras.layers.Conv2D(kernel, 1, padding='same', strides = 1)(x)
      x = conv_block(x, 'conv', kernel, (1, 1), strides = (1, 1))  
      x = tf.keras.layers.BatchNormalization()(x)
      x = tf.keras.activations.sigmoid(x)
      x = tf.keras.layers.Multiply()([x, inputs])

      return x   
    

def model(num_classes=19, input_size=(1024, 2048, 3)):

      # Input Layer
      input_layer = tf.keras.layers.Input(shape=input_size, name = 'input_layer')

      ## Step 1: Learning to DownSample
      convx1 = conv_block(input_layer, 'conv', 32, (3, 3), strides = (2, 2))

      convx2 = conv_block(convx1, 'conv', 64, (3, 3), strides = (2, 2))

      #STDC blocks  
      D3 = STDC_block(convx2, 256, (3, 3), strides=2, n=4)

      D4 = STDC_block(D3, 512, (3, 3), strides=2, n=5)

      D5 = STDC_block(D4, 1024, (3, 3), strides=2, n=3)
        
      D5 = tf.nn.avg_pool2d(D5, 1024, 1, 'SAME')
      D5 = conv_block(D5, 'conv', 128, (1, 1), strides = (1, 1))
      D5 = tf.keras.layers.UpSampling2D((1, 1))(D5)
    
      D5_arm = arm_block(D5, 128)
      D5_arm = tf.keras.layers.add([D5_arm, D5])
      D5_up = tf.keras.layers.UpSampling2D((2, 2))(D5_arm)
      D5_up = conv_block(D5_up, 'conv', 128, (3, 3), strides = (1, 1))
      
      D4 = conv_block(D4, 'conv', 128, (1, 1), strides = (1, 1))
      D4_arm = arm_block(D4, 128)
      D4_arm = tf.keras.layers.add([D4_arm, D5_up])
      D4_up = tf.keras.layers.UpSampling2D((2, 2))(D4_arm)
      D4_up = conv_block(D4_up, 'conv', 256, (3, 3), strides = (1, 1))
    
      #feature fusion module
      ffm = ffm_block(D4_up, D3, 256)
      
      #classifier
      classifier = conv_block(ffm, 'conv', 256, (3, 3), strides = (1, 1))
      classifier = tf.keras.layers.Conv2D(num_classes, 1, padding='same', strides = 1)(classifier)
        
      classifier = tf.keras.layers.Dropout(0.3)(classifier)
      classifier = tf.keras.layers.UpSampling2D((8, 8))(classifier)
      classifier = tf.dtypes.cast(classifier, tf.float32)
      classifier = tf.keras.activations.softmax(classifier)

      STDC2_seg = tf.keras.Model(inputs = input_layer , outputs = classifier, name = 'STDC2_seg')

      return STDC2_seg

In [25]:
STDC2_seg = model(num_classes=19, input_size=(1024, 2048, 3))
STDC2_seg.summary()

Model: "STDC2_seg"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 1024, 2048,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d_346 (Conv2D)            (None, 512, 1024, 3  896         ['input_layer[0][0]']            
                                2)                                                                
                                                                                                  
 batch_normalization_340 (Batch  (None, 512, 1024, 3  128        ['conv2d_346[0][0]']             
 Normalization)                 2)                                                        

In [27]:
#flops at 1024 x 2048x1536 resolution

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

from keras_flops import get_flops


# Calculae FLOPS
flops = get_flops(STDC2_seg, batch_size=1)
print(f"FLOPS: {flops / 10 ** 9:.03} G")

FLOPS: 2.97e+03 G
