### Investigating the FLOP count discrepency - Activation might not have an effect on FLOP count in TF at least

I have considered a simple feedforward network: </br>
**Input Layer (n = 64)  →  1st Hidden Layer (n = 32)  →  2nd Hidden Layer (n = 32)  →  3rd Hidden Layer (n = 32)  →  Output Layer (n = 32)**


### 1. Model with all relu activation

In [14]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber

np.random.seed(42)

In [15]:
def simple_NN(input_dim, output_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(output_dim, activation='relu')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mse']
    )
    return model


input_dim = 64
output_dim = 32
model = simple_NN(input_dim, output_dim)
model.summary()

# ------------- FLOP counts ---------------

# Create a concrete function
input_shape = (1, input_dim)  # batch size 1
concrete_func = tf.function(model).get_concrete_function(tf.TensorSpec(input_shape, tf.float32))

from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler.option_builder import ProfileOptionBuilder

profile_opts = ProfileOptionBuilder.float_operation()
flops = model_analyzer.profile(concrete_func.graph, options=profile_opts)

print('FLOPs:', flops.total_float_ops)

FLOPs: 10368


### 2. Model with all linear activation

In [16]:
def simple_NN(input_dim, output_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation='linear'),
        Dense(32, activation='linear'),
        Dense(32, activation='linear'),
        Dense(output_dim, activation='linear')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mse']
    )
    return model


input_dim = 64
output_dim = 32
model = simple_NN(input_dim, output_dim)
model.summary()

# ------------- FLOP counts ---------------

# Create a concrete function
input_shape = (1, input_dim)  # batch size 1
concrete_func = tf.function(model).get_concrete_function(tf.TensorSpec(input_shape, tf.float32))

from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler.option_builder import ProfileOptionBuilder

profile_opts = ProfileOptionBuilder.float_operation()
flops = model_analyzer.profile(concrete_func.graph, options=profile_opts)

print('FLOPs:', flops.total_float_ops)

FLOPs: 10368


### 3. Model with no activation at all

In [17]:
def simple_NN(input_dim, output_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation=None),
        Dense(32, activation=None),
        Dense(32, activation=None),
        Dense(output_dim, activation=None)
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mse']
    )
    return model


input_dim = 64
output_dim = 32
model = simple_NN(input_dim, output_dim)
model.summary()

# ------------- FLOP counts ---------------

# Create a concrete function
input_shape = (1, input_dim)  # batch size 1
concrete_func = tf.function(model).get_concrete_function(tf.TensorSpec(input_shape, tf.float32))

from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler.option_builder import ProfileOptionBuilder

profile_opts = ProfileOptionBuilder.float_operation()
flops = model_analyzer.profile(concrete_func.graph, options=profile_opts)

print('FLOPs:', flops.total_float_ops)

FLOPs: 10368


### 4. Our Network

In [18]:
def simple_NN(input_dim, output_dim, activation='relu'):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation=activation),
        Dense(32, activation=activation),
        Dense(32, activation=activation),
        Dense(output_dim, activation='linear')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mse']
    )
    return model


input_dim = 64
output_dim = 32
activation = LeakyReLU(negative_slope=0.1) #leaky_relu #relu
model = simple_NN(input_dim, output_dim)
model.summary()

# ------------- FLOP counts ---------------

# Create a concrete function
input_shape = (1, input_dim)  # batch size 1
concrete_func = tf.function(model).get_concrete_function(tf.TensorSpec(input_shape, tf.float32))

from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler.option_builder import ProfileOptionBuilder

profile_opts = ProfileOptionBuilder.float_operation()
flops = model_analyzer.profile(concrete_func.graph, options=profile_opts)

print('FLOPs:', flops.total_float_ops)

FLOPs: 10368


### 5. Our Network with seperate Leaky-ReLU layers

In [19]:
def simple_NN(input_dim, output_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(32, activation=None),
        LeakyReLU(negative_slope=0.1),
        Dense(32, activation=None),
        LeakyReLU(negative_slope=0.1),
        Dense(32, activation=None),
        LeakyReLU(negative_slope=0.1),
        Dense(output_dim, activation='linear')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mse']
    )
    return model


input_dim = 64
output_dim = 32
model = simple_NN(input_dim, output_dim)
model.summary()

# ------------- FLOP counts ---------------

# Create a concrete function
input_shape = (1, input_dim)  # batch size 1
concrete_func = tf.function(model).get_concrete_function(tf.TensorSpec(input_shape, tf.float32))

from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler.option_builder import ProfileOptionBuilder

profile_opts = ProfileOptionBuilder.float_operation()
flops = model_analyzer.profile(concrete_func.graph, options=profile_opts)

print('FLOPs:', flops.total_float_ops)

FLOPs: 10368
