In [1]:
import keras
import keras.models as models
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D, Conv1D, MaxPooling1D
from keras.layers.core import Flatten, Dense, Dropout, Activation, Reshape

def getDeepSigCNNModel(slice_size=64, classes=100, weights=None):
    """A dummy model to test the functionalities of the Data Generator"""
    model = models.Sequential()

    model.add(Conv1D(64,3,activation='relu', padding='same', input_shape=(slice_size, 2)))
    model.add(keras.layers.BatchNormalization(momentum=0.9))
    model.add(Activation('relu'))
    model.add(MaxPooling1D())

    for i in range (0,2):       #6): #original    #(0,3):for short uwb
        model.add(Conv1D(64,3, padding='same'))
        model.add(keras.layers.BatchNormalization(momentum=0.9))
        model.add(Activation('relu'))
        model.add(MaxPooling1D())

    model.add(Flatten())

    model.add(Dense(128,activation='selu'))
    model.add(Dense(128,activation='selu'))

    #model.add(Dense(classes,activation='linear'))
    model.add(Dense(classes,activation='softmax'))

    # Create model.
    # model = Model(x_input, x, name='baseline')

    return model

Using TensorFlow backend.


In [3]:
from keras.models import model_from_json
import time


import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

model_path = '/home/nasim/MachineLearning/results/CFOEstimation/cfo-lltf-contaminated/baseline_cfo_model.json'
with open (model_path,'r') as json_file:
    model = model_from_json(json_file.read(), custom_objects=None)


    
# model = getDeepSigCNNModel(slice_size=160, classes=98, weights=None)

model.summary()

import numpy as np
import random
from statistics import mean, pstdev

NN_input = np.random.normal(loc=0, scale=1.0, size=(1,160,2))
# measure time
time_list = []

for _ in range(100):
    start = time.time()
    model.predict(NN_input, batch_size=1) 
    end = time.time()
    duration = end-start
#     print duration
    time_list.append(duration)
time_list.pop(0)
mean = mean(time_list)
std = pstdev(time_list)


print mean,std

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 160, 128)          1920      
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 160, 128)          82048     
_________________________________________________________________
activation_1 (Activation)    (None, 160, 128)          0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 80, 128)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 80, 128)           114816    
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 80, 128)           82048     
_________________________________________________________________
activation_2 (Activation)    (None, 80, 128)           0         
__________

In [1]:
## calculate flops


def conv_flop_counter(input_shape, conv_filter):
    # input_shape = (3,300,300) # Format:(channels, rows,cols)
    # conv_filter = (64,3,3,3)  # Format: (num_filters, channels, rows, cols)
    stride = 1
    padding = 1
    activation = 'relu'

    n = conv_filter[1] * conv_filter[2] #* conv_filter[3]  # vector_length
    flops_per_instance = n + 1    # general defination for number of flops (n: multiplications and n-1: additions)

    num_instances_per_filter = (( input_shape[1] - conv_filter[2] + 2*padding) / stride ) #+ 1  # for rows
#     num_instances_per_filter *= (( input_shape[2] - conv_filter[3] + 2*padding) / stride ) + 1 # multiplying with cols

    flops_per_filter = num_instances_per_filter * flops_per_instance
    total_flops_per_layer = flops_per_filter * conv_filter[0]    # multiply with number of filters

    if activation == 'relu':
        # Here one can add number of flops required
        # Relu takes 1 comparison and 1 multiplication
        # Assuming for Relu: number of flops equal to length of input vector
        total_flops_per_layer += conv_filter[0]*num_instances_per_filter

    return total_flops_per_layer

def dense_flop_counter(input_size, output_size):
    return 2*input_size*output_size

## count flops in Pronto-S
total = 0
total += conv_flop_counter((2,160),(64,2,3))
# total += 64*160 
total += conv_flop_counter((64,80),(64,64,3))
# total += 64*80
total += conv_flop_counter((64,40),(64,64,3))
# total += 64*40
total += dense_flop_counter(1280,128)
total += dense_flop_counter(128,128)
total += dense_flop_counter(128,98)

print total
# packet detection: 1949952

## count flops in Pronto-L
total = 0
total += conv_flop_counter((2,160),(128,2,7))
total += conv_flop_counter((128,160),(128,128,5))
total += 128*160
total += conv_flop_counter((128,80),(128,128,7))
total += conv_flop_counter((128,80),(128,128,5))
total += 128*80
total += conv_flop_counter((128,40),(128,128,7))
total += conv_flop_counter((128,40),(128,128,5))
total += 128*40
total += conv_flop_counter((128,20),(128,128,7))
total += conv_flop_counter((128,20),(128,128,5))
total += 128*20
total += conv_flop_counter((128,10),(128,128,7))
total += conv_flop_counter((128,10),(128,128,5))
total += 128*10
total += dense_flop_counter(640,256)
total += dense_flop_counter(256,128)
total += dense_flop_counter(128,1)
# flops: 40807936

print total



1932032
39935232
