In [None]:
%matplotlib inline
%load_ext autoreload
%load_ext tensorboard
%autoreload 2

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os, sys

folder = "/content/drive/MyDrive/Tel_358_Project/CNN1"

sys.path.append(folder)

In [None]:
pip install QKeras

In [None]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices()
for d in physical_devices:
  print(d)

### TPU for Training Only

In [None]:
try:
  device_name = os.environ['COLAB_TPU_ADDR']
  TPU_ADDRESS = 'grpc://' + device_name
  print('Found TPU at: {}'.format(TPU_ADDRESS))
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
  tf.config.experimental_connect_to_cluster(resolver)
  # This is the TPU initialization code that has to be at the beginning.
  tf.tpu.experimental.initialize_tpu_system(resolver)
  print("All devices: ", tf.config.list_logical_devices('TPU'))
  strategy = tf.distribute.TPUStrategy(resolver)
except KeyError:
  print('TPU not found')
  strategy = tf.distribute.get_strategy()

In [None]:
import os,random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm
import pickle, random, sys
import keras
import keras.backend as K
from keras.callbacks import LearningRateScheduler,TensorBoard
from keras.optimizers import Adam
import csv
import mltools,rmldataset2016
import rmlmodels.CNN2Model as cnn2
import tensorflow_model_optimization as tfmot

### Importing The Dataset

In [None]:

K.set_image_data_format('channels_last')
print(K.image_data_format())

(mods,snrs,lbl),(X_train,Y_train),(X_val,Y_val),(X_test,Y_test),(train_idx,val_idx,test_idx) = \
    rmldataset2016.load_data()

in_shp = list(X_train.shape[1:])
print(X_train.shape)
classes = mods
print(classes)

In [None]:
# Set up some params
nb_epoch = 1     # number of epochs to train on
batch_size = 256  # training batch size
filepath = 'weights/weights.h5'

In [None]:
tf.keras.backend.clear_session()
with strategy.scope():
  model = cnn2.CNN2Model(None, input_shape=in_shp,classes=len(classes))
  model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
model.summary()

In [None]:
history = model.fit(X_train,
    Y_train,
    batch_size=batch_size,
    epochs=nb_epoch,
    verbose=2,
    validation_data=(X_val,Y_val),
    callbacks = [
                keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto'),
                keras.callbacks.ReduceLROnPlateau(monitor='val_loss',factor=0.5,verbose=1,patince=5,min_lr=0.000001),
                keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=1, mode='auto')
                #keras.callbacks.TensorBoard(log_dir='./logs/',histogram_freq=1,write_graph=False,write_grads=1,write_images=False,update_freq='epoch')
                ]
                    )



In [None]:
def predict(model):
    # Plot confusion matrix
    model.load_weights(filepath)
    test_Y_hat = model.predict(X_test, batch_size=batch_size)
    confnorm,_,_ = mltools.calculate_confusion_matrix(Y_test,test_Y_hat,classes)
    mltools.plot_confusion_matrix(confnorm, labels=['8PSK','AM-DSB','AM-SSB','BPSK','CPFSK','GFSK','4-PAM','16-QAM','64-QAM','QPSK','WBFM'],save_filename='cnn2_total_confusion')

    # Plot confusion matrix
    acc = {}
    acc_mod_snr = np.zeros( (len(classes),len(snrs)) )
    i = 0
    for snr in snrs:

        # extract classes @ SNR
        # test_SNRs = map(lambda x: lbl[x][1], test_idx)
        test_SNRs = [lbl[x][1] for x in test_idx]

        test_X_i = X_test[np.where(np.array(test_SNRs) == snr)]
        test_Y_i = Y_test[np.where(np.array(test_SNRs) == snr)]

        # estimate classes
        test_Y_i_hat = model.predict(test_X_i)
        confnorm_i,cor,ncor = mltools.calculate_confusion_matrix(test_Y_i,test_Y_i_hat,classes)
        acc[snr] = 1.0 * cor / (cor + ncor)
        result = cor / (cor + ncor)
        with open('acc111.csv', 'a', newline='') as f0:
            write0 = csv.writer(f0)
            write0.writerow([result])
        mltools.plot_confusion_matrix(confnorm_i, labels=['8PSK','AM-DSB','AM-SSB','BPSK','CPFSK','GFSK','4-PAM','16-QAM','64-QAM','QPSK','WBFM'], title="Confusion Matrix",save_filename="Confusion(SNR=%d)(ACC=%2f).png" % (snr,100.0*acc[snr]))

        acc_mod_snr[:,i] = np.round(np.diag(confnorm_i)/np.sum(confnorm_i,axis=1),3)
        i = i +1

    #plot acc of each mod in one picture
    dis_num=11
    for g in range(int(np.ceil(acc_mod_snr.shape[0]/dis_num))):
        assert (0 <= dis_num <= acc_mod_snr.shape[0])
        beg_index = g*dis_num
        end_index = np.min([(g+1)*dis_num,acc_mod_snr.shape[0]])

        plt.figure(figsize=(12, 10))
        plt.xlabel("Signal to Noise Ratio")
        plt.ylabel("Classification Accuracy")
        plt.title("Classification Accuracy for Each Mod")

        for i in range(beg_index,end_index):
            plt.plot(snrs, acc_mod_snr[i], label=classes[i])
            # 设置数字标签
            for x, y in zip(snrs, acc_mod_snr[i]):
                plt.text(x, y, y, ha='center', va='bottom', fontsize=8)

        plt.legend()
        plt.grid()
        plt.savefig('acc_with_mod_{}.png'.format(g+1))
        plt.close()
    #save acc for mod per SNR
    fd = open('acc_for_mod_on_cnn2.dat', 'wb')
    pickle.dump(('128','cnn2', acc_mod_snr), fd)
    fd.close()

    # Save results to a pickle file for plotting later
    print(acc)
    fd = open('cnn2_d0.5.dat','wb')
    pickle.dump( ("CNN2", 0.5, acc) , fd )

    # Plot accuracy curve
    plt.plot(snrs, list(map(lambda x: acc[x], snrs)))
    plt.xlabel("Signal to Noise Ratio")
    plt.ylabel("Classification Accuracy")
    plt.title("Classification Accuracy on RadioML 2016.10 Alpha")
    plt.tight_layout()
    plt.savefig('each_acc.png')
    plt.close()
#predict(model_q16)

In [None]:
from qkeras.autoqkeras import *
from qkeras import *
from qkeras.utils import model_quantize
from qkeras.qtools import run_qtools
from qkeras.qtools import settings as qtools_settings

In [None]:
model= tf.keras.models.load_model('/content/model_float32_complete.h5')

In [None]:
import pprint
reference_internal = "int8"
reference_accumulator = "int32"

q = run_qtools.QTools(
      model,
      # energy calculation using a given process
      # "horowitz" refers to 45nm process published at
      # M. Horowitz, "1.1 Computing's energy problem (and what we can do about
      # it), "2014 IEEE International Solid-State Circuits Conference Digest of
      # Technical Papers (ISSCC), San Francisco, CA, 2014, pp. 10-14,
      # doi: 10.1109/ISSCC.2014.6757323.
      process="horowitz",
      # quantizers for model input
      source_quantizers=reference_internal,

      is_inference=False,
      # absolute path (including filename) of the model weights
      # in the future, we will attempt to optimize the power model
      # by using weight information, although it can be used to further
      # optimize QBatchNormalization.
      weights_path=None,
      # keras_quantizer to quantize weight/bias in un-quantized keras layers
      keras_quantizer=reference_internal,
      # keras_quantizer to quantize MAC in un-quantized keras layers
      keras_accumulator=reference_accumulator,
      # whether calculate baseline energy
      for_reference=True)

# caculate energy of the derived data type map.
energy_dict = q.pe(
    # whether to store parameters in dram, sram, or fixed
    weights_on_memory="sram",
    # store activations in dram or sram
    activations_on_memory="dram",
    # minimum sram size in number of bits. Let's assume a 16MB SRAM.
    min_sram_size=8*16*1024*1024,
    # whether load data from dram to sram (consider sram as a cache
    # for dram. If false, we will assume data will be already in SRAM
    rd_wr_on_io=False)

# get stats of energy distribution in each layer
energy_profile = q.extract_energy_profile(
    qtools_settings.cfg.include_energy, energy_dict)
# extract sum of energy of each layer according to the rule specified in
# qtools_settings.cfg.include_energy
total_energy = q.extract_energy_sum(
    qtools_settings.cfg.include_energy, energy_dict)

pprint.pprint(energy_profile)
print()
print("Total energy: {:.2f} uJ".format(total_energy / 1000000.0))

In [None]:
# Convert to TFLite with dynamic range quantization (int8)
converter_int8 = tf.lite.TFLiteConverter.from_keras_model(model)
converter_int8.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_int8_model = converter_int8.convert()

# Save int8 model
with open('model_int8.tflite', 'wb') as f:
    f.write(tflite_int8_model)

# Convert to TFLite with float16 quantization
converter_float16 = tf.lite.TFLiteConverter.from_keras_model(model)
converter_float16.optimizations = [tf.lite.Optimize.DEFAULT]
converter_float16.target_spec.supported_types = [tf.float16]
tflite_float16_model = converter_float16.convert()

# Save float16 model
with open('model_float16.tflite', 'wb') as f:
    f.write(tflite_float16_model)

In [None]:
import os

# Calculate model sizes in MB
original_size_mb = os.path.getsize('model_float32.h5') / (1024 * 1024)
quantized_size_int8_mb = os.path.getsize('model_int8.tflite') / (1024 * 1024)
quantized_size_float16_mb = os.path.getsize('model_float16.tflite') / (1024 * 1024)

print(f"Original model size (float32): {original_size_mb:.2f} MB")
print(f"Quantized model size (float16): {quantized_size_float16_mb:.2f} MB")
print(f"Quantized model size (int8): {quantized_size_int8_mb:.2f} MB")

In [None]:
# Evaluate accuracy on test set

def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]['index']
    output_index = interpreter.get_output_details()[0]['index']
    num_samples = len(X_test)
    correct_predictions = 0

    for i in range(num_samples):
        input_data = np.expand_dims(X_test[i], axis=0)
        interpreter.set_tensor(input_index, input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_index)
        predicted_label = np.argmax(output_data)
        if predicted_label == np.argmax(Y_test[i]):
            correct_predictions += 1

    accuracy = correct_predictions / num_samples
    return accuracy

# Evaluate float32 model accuracy on test set using model.predict
score = model.evaluate(X_test, Y_test, verbose=1, batch_size=4096)
print(f"Test accuracy (float32):", (score[1] * 100))


# Evaluate float16 model
accuracy_float16 = evaluate_model(interpreter_float16)
print(f"Test set accuracy (float16): {accuracy_float16:.4f}")

# Evaluate int8 model
accuracy_int8 = evaluate_model(interpreter_int8)
print(f"Test set accuracy (int8): {accuracy_int8:.4f}")

### Pruning


In [None]:
pruning_params= {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, begin_step=2000, frequency=100) # Sparsity Levels: 0.25, 0.5, 0.75
}

tf.keras.backend.clear_session()

with tf.device('/device:GPU:0'):

  model= tf.keras.models.load_model('/content/model_float32_complete.h5')

  model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)


  model_for_pruning.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')



history = model_for_pruning.fit(X_train,
    Y_train,
    batch_size=batch_size,
    epochs=5,
    verbose=2,
    validation_data=(X_val,Y_val),

    callbacks = [

                tfmot.sparsity.keras.UpdatePruningStep()

                ]
                    )

In [None]:
score = model_for_pruning.evaluate(X_test, Y_test, verbose=0, batch_size=4096)

model_for_pruning_accuracy = score[1]
print(score[1] * 100)

In [None]:
model_pruned = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

In [None]:


converter = tf.lite.TFLiteConverter.from_keras_model(model_pruned)
pruned_tflite_model = converter.convert()

#_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open("model_pruned.tflite", 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', "/content/model_pruned.tflite")

In [None]:
import tempfile

def get_gzipped_model_size(file):
  # Returns size of gzipped model, in MB.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file) / (1024 * 1024)

In [None]:
print("Size of gzipped pruned TFlite model: %.2f MB" % (get_gzipped_model_size("/content/model_pruned.tflite")))

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_pruned)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.target_spec.supported_types = [tf.float16] #FP16
quantized_and_pruned_tflite_model = converter.convert()

#_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open("model_pruned_quantized.tflite", 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)

#print('Saved quantized and pruned TFLite model to:', model_pruned_quantized)

print("Size of gzipped pruned and quantized TFlite model: %.2f MB" % (get_gzipped_model_size("/content/model_pruned_quantized.tflite")))

In [None]:
def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]['index']
    output_index = interpreter.get_output_details()[0]['index']
    num_samples = len(X_test)
    correct_predictions = 0

    for i in range(num_samples):
        input_data = np.expand_dims(X_test[i], axis=0)
        interpreter.set_tensor(input_index, input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_index)
        predicted_label = np.argmax(output_data)
        if predicted_label == np.argmax(Y_test[i]):
            correct_predictions += 1

    accuracy = correct_predictions / num_samples
    return accuracy

In [None]:
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('Pruned and quantized TFLite test_accuracy: {:.4f}'.format(test_accuracy * 100))

print('Pruned TF test accuracy: {:.4f}'.format(model_for_pruning_accuracy * 100))

###TensorRT (You Need Linux Based Kernel for This Part)

In [None]:
!sudo apt-get install tensorrt

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import time

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.compiler.tensorrt import trt_convert as trt

In [None]:
from tensorflow.python.client import device_lib

def check_tensor_core_gpu_present():
    local_device_protos = device_lib.list_local_devices()
    for line in local_device_protos:
        if "compute capability" in str(line):
            compute_capability = float(line.physical_device_desc.split("compute capability: ")[-1])
            if compute_capability>=7.0:
                return True

print("Tensor Core GPU Present:", check_tensor_core_gpu_present())
tensor_core_gpu = check_tensor_core_gpu_present()

In [None]:
model_pruned.save('saved_model')
model = tf.keras.models.load_model('saved_model')

In [None]:
batch_size = 32
num_samples = X_test.shape[0]
length = X_test.shape[2]
num_channels = X_test.shape[1]

batched_input = np.zeros((batch_size, num_channels, length), dtype=np.float32)

for i in range(batch_size):
  img_index = i % num_samples
  x = X_test[img_index]
  x = np.expand_dims(x, axis=0)
  batched_input[i, :] = x

batched_input = tf.constant(batched_input)
print('batched_input shape: ', batched_input.shape)


In [None]:
# Benchmarking throughput
N_warmup_run = 50
N_run = 1000
elapsed_time = []

for i in range(N_warmup_run):
  preds = model.predict(batched_input)

for i in range(N_run):
  start_time = time.time()
  preds = model.predict(batched_input)
  end_time = time.time()
  elapsed_time = np.append(elapsed_time, end_time - start_time)
  if i % 50 == 0:
    print('Step {}: {:4.1f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

print('Throughput: {:.0f} images/s'.format(N_run * batch_size / elapsed_time.sum()))

In [None]:
from tensorflow.python.saved_model import tag_constants

def benchmark_tftrt(input_saved_model):
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    N_warmup_run = 50
    N_run = 1000
    elapsed_time = []

    input_name = list(infer.structured_input_signature[1].keys())[0]

    # Reshape batched_input to match the input shape that the model expects
    reshaped_input = tf.expand_dims(batched_input, axis=-1)

    for i in range(N_warmup_run):
      labeling = infer(**{input_name: reshaped_input})

    for i in range(N_run):
      start_time = time.time()
      labeling = infer(**{input_name: reshaped_input})
      end_time = time.time()
      elapsed_time = np.append(elapsed_time, end_time - start_time)
      if i % 50 == 0:
        print('Step {}: {:4.1f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

    print('Throughput: {:.0f} images/s'.format(N_run * batch_size / elapsed_time.sum()))
    return (N_run * batch_size / elapsed_time.sum())


In [None]:
print('Converting to TF-TRT FP32...')

converter = trt.TrtGraphConverterV2(input_saved_model_dir='saved_model',
                                   precision_mode=trt.TrtPrecisionMode.FP32,
                                    max_workspace_size_bytes=8000000000)
converter.convert()
converter.save(output_saved_model_dir='saved_model_TFTRT_FP32')
print('Done Converting to TF-TRT FP32')

In [None]:
FP_32_rt = 0

for i in range(10):
  FP_32_rt += benchmark_tftrt('saved_model_TFTRT_FP32')

print('Average Throughput: {:.0f} images/s'.format(FP_32_rt / 10))

In [None]:
print('Converting to TF-TRT FP16...')

converter = trt.TrtGraphConverterV2(input_saved_model_dir='saved_model',
                                   precision_mode=trt.TrtPrecisionMode.FP16,
                                    max_workspace_size_bytes=8000000000)
converter.convert()
converter.save(output_saved_model_dir='saved_model_TFTRT_FP16')
print('Done Converting to TF-TRT FP16')

In [None]:
FP_16_rt = 0

for i in range(10):
  FP_16_rt += benchmark_tftrt('saved_model_TFTRT_FP16')

print('Average Throughput: {:.0f} images/s'.format(FP_16_rt / 10))

In [None]:
def calibration_input_fn():
    yield (tf.expand_dims(batched_input, axis=-1), )

print('Converting to TF-TRT INT8...')

converter = trt.TrtGraphConverterV2(input_saved_model_dir='saved_model',
                                   precision_mode=trt.TrtPrecisionMode.INT8,
                                    max_workspace_size_bytes=8000000000)

converter.convert(calibration_input_fn=calibration_input_fn)
converter.save(output_saved_model_dir='saved_model_TFTRT_INT8')
print('Done Converting to TF-TRT INT8')

In [None]:
INT_8_rt = 0

for i in range(10):
  INT_8_rt += benchmark_tftrt('saved_model_TFTRT_INT8')

print('Average Throughput: {:.0f} images/s'.format(INT_8_rt / 10))