## **Install Protobuf**

In [0]:
%%capture
!apt-get install autoconf automake libtool curl make g++ unzip
!git clone https://github.com/protocolbuffers/protobuf.git
%cd protobuf
!git submodule update --init --recursive
!./autogen.sh
!./configure
!make -j8
!make check
!sudo make install
!sudo ldconfig 
%cd ..

## **Downloading the Files**

In [0]:
import shutil
import os 
%cd /content/
if os.path.isdir('/content/HP3-CNN-Inferencing'):
  shutil.rmtree('/content/HP3-CNN-Inferencing')

/content


In [0]:
import os
from getpass import getpass
import urllib

user = input('User name: ')
password = getpass('Password: ')
password = urllib.parse.quote(password) # your password is converted into url format

cmd_string = 'git clone https://{0}:{1}@github.com/prajwal1210/HP3-CNN-Inferencing.git'.format(user, password)

os.system(cmd_string)
cmd_string, password = "", "" # removing the password from the variable

User name: prajwal1210
Password: ··········


In [0]:
%cd /content/HP3-CNN-Inferencing
!ls

/content/HP3-CNN-Inferencing
ConvertToSpecification.py  kernels	      profiling  README.md
forward			   pretrained-models  proto


## **Compiling Proto and Downloading the Pretrained Models and save them in protobuf format**

In [0]:
%cd proto
!protoc -I=. --cpp_out=. ./network.proto
%cd ..

/content/HP3-CNN-Inferencing/proto
/content/HP3-CNN-Inferencing


In [0]:
!python ConvertToSpecification.py

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/checkpoints/vgg19-dcbb9e9d.pth
100% 548M/548M [00:27<00:00, 21.0MB/s]
Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth
100% 233M/233M [00:06<00:00, 37.5MB/s]


In [0]:
%%capture
%cd forward/data
!unzip MiniImageNet.zip
%cd ../../

## **Forward Test**

#### **Single Image Test**

In [0]:
%cd /content/HP3-CNN-Inferencing/forward/cnn_forward_test/
!ls

/content/HP3-CNN-Inferencing/forward/cnn_forward_test
cnn_forward_test.cc  cnn_forward_test.py  Makefile


In [0]:
!make

nvcc -I /content/HP3-CNN-Inferencing -L /usr/local/cuda-10.1/man/man7/libcublas.so.7  -std=c++14 cnn_forward_test.cc /content/HP3-CNN-Inferencing/forward/operations.cc /content/HP3-CNN-Inferencing/forward/cnn_forward.cc /content/HP3-CNN-Inferencing/forward/data_util.cc /content/HP3-CNN-Inferencing/proto/translator.cc /content/HP3-CNN-Inferencing/proto/network.pb.cc /content/HP3-CNN-Inferencing/kernels/FFT/fft_fast.cu /content/HP3-CNN-Inferencing/kernels/Direct/direct_conv.cu /content/HP3-CNN-Inferencing/kernels/winograd/winograd_fast.cu /content/HP3-CNN-Inferencing/kernels/im2col/im2col.cu -I/usr/include/opencv -lopencv_shape -lopencv_stitching -lopencv_superres -lopencv_videostab -lopencv_aruco -lopencv_bgsegm -lopencv_bioinspired -lopencv_ccalib -lopencv_datasets -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy -lopencv_hdf -lopencv_line_descriptor -lopencv_optflow -lopencv_video -lopencv_plot -lopencv_reg -lopencv_saliency -lopencv_stereo -lopencv_structured_light -lopenc

In [0]:
!make run_direct

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "DIRECT" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x5577c6608000 @  0x7f8cf1705887 0x7f8cc9e9b07d 0x7f8cc9e8eca8 0x7f8cc9e994ab 0x7f8cc9e224e5 0x7f8cc9e89518 0x557783c38ad3 0x557783c3242f 0x7f8cc978eb97 0x557783c32b3a
python cnn_forward_test.py vgg
Average difference between two values of the output layer - 0.000001

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "DIRECT" 1>/dev/null
python cnn_forward_test.py alex 
Average difference between two values of the output layer - 0.000001


In [0]:
!make run_im2col

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "IM2COL" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x56083381a000 @  0x7ff554272887 0x7ff52ca0807d 0x7ff52c9fbca8 0x7ff52ca064ab 0x7ff52c98f4e5 0x7ff52c9f6518 0x5607f1a86ad3 0x5607f1a8042f 0x7ff52c2fbb97 0x5607f1a80b3a
python cnn_forward_test.py vgg
Average difference between two values of the output layer - 0.000001

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "IM2COL" 1>/dev/null
python cnn_forward_test.py alex 
Average difference between two values of the output layer - 0.000001


In [0]:
!make run_fft

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "FFT" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x564d10c80000 @  0x7fcc8c755887 0x7fcc64eeb07d 0x7fcc64edeca8 0x7fcc64ee94ab 0x7fcc64e724e5 0x7fcc64ed9518 0x564ccdb3fad3 0x564ccdb3942f 0x7fcc647deb97 0x564ccdb39b3a
python cnn_forward_test.py vgg
Average difference between two values of the output layer - 0.000004

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "FFT" 1>/dev/null
python cnn_forward_test.py alex 
Average difference between two values of the output layer - 0.000002


In [0]:
!make run_winograd

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "WINOGRAD" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x555d215a0000 @  0x7fbd817fb887 0x7fbd59f9107d 0x7fbd59f84ca8 0x7fbd59f8f4ab 0x7fbd59f184e5 0x7fbd59f7f518 0x555cdea09ad3 0x555cdea0342f 0x7fbd59884b97 0x555cdea03b3a
python cnn_forward_test.py vgg
Average difference between two values of the output layer - 0.000001


#### **Batch Test**

In [0]:
%cd /content/HP3-CNN-Inferencing/forward/batch_test/

/content/HP3-CNN-Inferencing/forward/batch_test


In [0]:
!make

nvcc -I /content/HP3-CNN-Inferencing -L /usr/local/cuda-10.1/man/man7/libcublas.so.7  -std=c++14 batch_test.cc /content/HP3-CNN-Inferencing/forward/operations.cc /content/HP3-CNN-Inferencing/forward/cnn_forward.cc /content/HP3-CNN-Inferencing/forward/data_util.cc /content/HP3-CNN-Inferencing/proto/translator.cc /content/HP3-CNN-Inferencing/proto/network.pb.cc /content/HP3-CNN-Inferencing/kernels/FFT/fft_fast.cu /content/HP3-CNN-Inferencing/kernels/Direct/direct_conv.cu /content/HP3-CNN-Inferencing/kernels/winograd/winograd_fast.cu /content/HP3-CNN-Inferencing/kernels/im2col/im2col.cu -I/usr/include/opencv -lopencv_shape -lopencv_stitching -lopencv_superres -lopencv_videostab -lopencv_aruco -lopencv_bgsegm -lopencv_bioinspired -lopencv_ccalib -lopencv_datasets -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy -lopencv_hdf -lopencv_line_descriptor -lopencv_optflow -lopencv_video -lopencv_plot -lopencv_reg -lopencv_saliency -lopencv_stereo -lopencv_structured_light -lopencv_phas

In [0]:
!make run_direct

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "DIRECT" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x564a36d4a000 @  0x7fb4a63e8887 0x7fb47eb7e07d 0x7fb47eb71ca8 0x7fb47eb7c4ab 0x7fb47eb054e5 0x7fb47eb6c518 0x5649f5162bb3 0x5649f515c42f 0x7fb47e471b97 0x5649f515cc1a
python batch_test.py vgg
Average difference between two values of the output layer - 0.000001

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "DIRECT" 1>/dev/null
python batch_test.py alex 
Average difference between two values of the output layer - 0.000001


In [0]:
!make run_im2col

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "IM2COL" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x55af4358c000 @  0x7f3c4a91e887 0x7f3c230b407d 0x7f3c230a7ca8 0x7f3c230b24ab 0x7f3c2303b4e5 0x7f3c230a2518 0x55af01dbdbb3 0x55af01db742f 0x7f3c229a7b97 0x55af01db7c1a
python batch_test.py vgg
Average difference between two values of the output layer - 0.000001

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "IM2COL" 1>/dev/null
python batch_test.py alex 
Average difference between two values of the output layer - 0.000001


In [0]:
!make run_fft

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "FFT" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x55b5db632000 @  0x7f6192d35887 0x7f616b4cb07d 0x7f616b4beca8 0x7f616b4c94ab 0x7f616b4524e5 0x7f616b4b9518 0x55b598ff7bb3 0x55b598ff142f 0x7f616adbeb97 0x55b598ff1c1a
python batch_test.py vgg
Average difference between two values of the output layer - 0.000004

./test "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "FFT" 1>/dev/null
python batch_test.py alex
Average difference between two values of the output layer - 0.000002


In [0]:
!make run_winograd

./test "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "WINOGRAD" 1>/dev/null
tcmalloc: large alloc 1073750016 bytes == 0x562d645b2000 @  0x7fbc1703e887 0x7fbbef7d407d 0x7fbbef7c7ca8 0x7fbbef7d24ab 0x7fbbef75b4e5 0x7fbbef7c2518 0x562d21821bb3 0x562d2181b42f 0x7fbbef0c7b97 0x562d2181bc1a
python batch_test.py vgg
Average difference between two values of the output layer - 0.000001


## **Profiling**


In [0]:
%cd /content/HP3-CNN-Inferencing/profiling

/content/HP3-CNN-Inferencing/profiling


In [0]:
!make

nvcc -I /content/HP3-CNN-Inferencing -L /usr/local/cuda-10.1/man/man7/libcublas.so.7  -std=c++14 profiler.cc /content/HP3-CNN-Inferencing/forward/operations.cc /content/HP3-CNN-Inferencing/forward/cnn_forward.cc /content/HP3-CNN-Inferencing/forward/data_util.cc /content/HP3-CNN-Inferencing/proto/translator.cc /content/HP3-CNN-Inferencing/proto/network.pb.cc /content/HP3-CNN-Inferencing/kernels/FFT/fft_fast.cu /content/HP3-CNN-Inferencing/kernels/Direct/direct_conv.cu /content/HP3-CNN-Inferencing/kernels/winograd/winograd_fast.cu /content/HP3-CNN-Inferencing/kernels/im2col/im2col.cu -I/usr/include/opencv -lopencv_shape -lopencv_stitching -lopencv_superres -lopencv_videostab -lopencv_aruco -lopencv_bgsegm -lopencv_bioinspired -lopencv_ccalib -lopencv_datasets -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy -lopencv_hdf -lopencv_line_descriptor -lopencv_optflow -lopencv_video -lopencv_plot -lopencv_reg -lopencv_saliency -lopencv_stereo -lopencv_structured_light -lopencv_phase_

In [0]:
!make run_vgg

rm -f logVGG.txt
./test "VGG" "/content/HP3-CNN-Inferencing/pretrained-models/vgg19.pb" "DIRECT"
tcmalloc: large alloc 1073750016 bytes == 0x55a5227f2000 @  0x7f0d69c93887 0x7f0d4242907d 0x7f0d4241cca8 0x7f0d424274ab 0x7f0d423b04e5 0x7f0d42417518 0x55a4df57eea3 0x55a4df57785f 0x7f0d4197eb97 0x55a4df577fba
FOR THE ALGORITHM : DIRECT
../forward/data/MiniImageNet/ImageLists.txt
Running for Batchsize - 1
Processing the Layer Type : CONV(64, 3, kernel_size = (3, 3), stride = (1 ,1), padding = (1 ,1)) --> (1, 64, 256, 256)
USING DIRECT CONVOLUTION
Input - ( 1, 3, 256, 256 )
TIME ELAPSED - 17.6247 = 5.64346 + 0.382944
Output - ( 1, 64, 256, 256 )
Processing the Layer Type : ACTIVATION(RELU) --> (1, 64, 256, 256)
Input - ( 1, 64, 256, 256 )
Output - ( 1, 64, 256, 256 )
Processing the Layer Type : CONV(64, 64, kernel_size = (3, 3), stride = (1 ,1), padding = (1 ,1)) --> (1, 64, 256, 256)
USING DIRECT CONVOLUTION
Input - ( 1, 64, 256, 256 )
TIME ELAPSED - 142.394 = 107.574 + 7.93094
Output - ( 1

In [0]:
!make run_alex

rm -f logVGG.txt
./test "ALEX" "/content/HP3-CNN-Inferencing/pretrained-models/alexnet.pb" "DIRECT"
FOR THE ALGORITHM : DIRECT
../forward/data/MiniImageNet/ImageLists.txt
Running for Batchsize - 1
Processing the Layer Type : CONV(64, 3, kernel_size = (11, 11), stride = (4 ,4), padding = (2 ,2)) --> (1, 64, 63, 63)
USING DIRECT CONVOLUTION
Input - ( 1, 3, 256, 256 )
TIME ELAPSED - 5.4753 = 2.78774 + 0.38192
Output - ( 1, 64, 63, 63 )
Processing the Layer Type : ACTIVATION(RELU) --> (1, 64, 63, 63)
Input - ( 1, 64, 63, 63 )
Output - ( 1, 64, 63, 63 )
Processing the Layer Type : POOL(MAX, kernel_size = (3, 3), stride = (2 ,2), padding = (0 ,0)) --> (1, 64, 31, 31)
Input - ( 1, 64, 63, 63 )
Output - ( 1, 64, 31, 31 )
Kernel Size - ( 3, 3 )
Stride - ( 2, 2 )
Processing the Layer Type : CONV(192, 64, kernel_size = (5, 5), stride = (1 ,1), padding = (2 ,2)) --> (1, 192, 31, 31)
USING DIRECT CONVOLUTION
Input - ( 1, 64, 31, 31 )
TIME ELAPSED - 10.976 = 9.67824 + 0.150688
Output - ( 1, 192, 31,