# Before:

In [20]:
print(classification_report(y_test, preds))
print(fps)
print('mAP: ' + mAP)
print ("Total number of parameters: " + params)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.98      1.00      0.99       300
           2       1.00      0.98      0.99       300

   micro avg       0.99      0.99      0.99       900
   macro avg       0.99      0.99      0.99       900
weighted avg       0.99      0.99      0.99       900
 samples avg       0.99      0.99      0.99       900

FPS: 16.01 fps
mAP: 0.9940740740740741
Total number of parameters: 5969344


# After:

In [32]:
print(classification_report(y_test, preds_adam))
print(fps_adam)
print('mAP: ' + mAP_adam)
print ("Total number of parameters: " + params_adam)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.99      0.99      0.99       300
           2       1.00      0.99      0.99       300

   micro avg       0.99      0.99      0.99       900
   macro avg       0.99      0.99      0.99       900
weighted avg       0.99      0.99      0.99       900
 samples avg       0.99      0.99      0.99       900

FPS: 45.76 fps
mAP: 0.9948148148148148
Total number of parameters: 659136


In [1]:
import caffe
import glob
from numpy import prod
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import LabelBinarizer

In [2]:
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

model_def = 'human_car_deploy.prototxt'
model_weights = '_iter_800000.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)

In [3]:
caffe_root = '/home/vadim/caffe/'

In [4]:
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print ('mean-subtracted values:', zip('BGR', mu))

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

mean-subtracted values: <zip object at 0x7f7727b0ab90>


In [5]:
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
#net.blobs['data'].reshape(50,        # batch size
#                          3,         # 3-channel (BGR) images
#                          224, 224)  # image size is 224
net.blobs['data'].reshape(104,        # batch size
                          3,         # 3-channel (BGR) images
                          224, 224)

In [6]:
categories=[x.strip() for x in open('labels.txt').readlines()]
print (categories)

['noise', 'car', 'human']


In [7]:
preds = []

In [8]:
%%time
from timeit import default_timer as timer
time_start = timer()
#this is very ineffective O(n^2)+complexity of network
for i, x in enumerate(categories):
    for j, y in enumerate(sorted(glob.glob('Data/test/{}/*'.format(x)))):
        net.blobs['data'].data[...] = transformer.preprocess('data',caffe.io.load_image(y))
        net.reshape()
        pred = net.forward()
        preds.append(np.argmax(pred['prob']))
        #print(y, categories[np.argmax(pred['prob'])])

time_end = timer()
fps=('FPS: %.2f fps' % (1000/(time_end-time_start)))
print('FPS: %.2f fps' % (1000/(time_end-time_start)))

FPS: 16.01 fps
CPU times: user 1min 2s, sys: 58.9 ms, total: 1min 2s
Wall time: 1min 2s


In [9]:
# convert the data and labels to NumPy arrays
preds = np.array(preds)

# perform one-hot encoding on the labels
lb = LabelBinarizer()
preds = lb.fit_transform(preds)
#preds = to_categorical(preds)
print(preds)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [10]:
params = str(sum([prod(v[0].data.shape) for k, v in net.params.items()]))
print ("Total number of parameters: " + str(sum([prod(v[0].data.shape) for k, v in net.params.items()])))

Total number of parameters: 5969344


In [11]:
BS = 32

In [12]:
CURRENT_DIR = '/home/vadim/testovoe/dev/'

In [13]:
test_df = pd.read_csv(CURRENT_DIR+"Data/text.txt", sep="\s+", header=None, names=["name", "category"])

In [14]:
def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = math.floor(pad_diff[0]/2), math.ceil(pad_diff[0]/2)
    l, r = math.floor(pad_diff[1]/2), math.ceil(pad_diff[1]/2)
    if is_rgb:
        pad_width = ((t,b), (l,r), (0, 0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def preprocess_image(image_path, desired_size=224):
    im = Image.open(image_path)
    im = im.resize((desired_size, )*2, resample=Image.LANCZOS)
    
    return im

In [15]:
N = test_df.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(test_df['name'])):
    x_test[i, :, :, :] = preprocess_image(
         f'/home/vadim/testovoe/dev/{image_id}'
    )

100%|██████████| 900/900 [00:00<00:00, 1100.35it/s]


In [16]:
y_test = pd.get_dummies(test_df['category']).values

In [17]:
print(y_test)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [None]:
#categories=[x.strip() for x in open('labels.txt').readlines()]
#labels = pd.factorize(labels)[0]

In [18]:
# make predictions on the testing set
#print("[INFO] evaluating network...")
#predIdxs = model.predict(testX, batch_size=BS)

# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
#predIdxs = np.argmax(predIdxs, axis=1)


# show a nicely formatted classification report
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.98      1.00      0.99       300
           2       1.00      0.98      0.99       300

   micro avg       0.99      0.99      0.99       900
   macro avg       0.99      0.99      0.99       900
weighted avg       0.99      0.99      0.99       900
 samples avg       0.99      0.99      0.99       900



In [19]:
mAP = str(average_precision_score(y_test, preds, average="samples"))
print(average_precision_score(y_test, preds, average="samples"))

0.9940740740740741


# Adam

In [21]:
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

#model_def = 'deploy_3module_adam.prototxt'
#model_weights = '3module_iter_960.caffemodel'
#model_weights = '3module_iter_928.caffemodel'
model_def = 'deploy_1module_head.prototxt'
model_weights = '1module_iter_960.caffemodel'
#model_def = 'deploy_1module_head_reduce.prototxt'
#model_weights = '1module_reduce_iter_960.caffemodel'


net_adam = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)

In [22]:
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print ('mean-subtracted values:', zip('BGR', mu))

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net_adam.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

mean-subtracted values: <zip object at 0x7f7720143cd0>


In [23]:
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
#net.blobs['data'].reshape(50,        # batch size
#                          3,         # 3-channel (BGR) images
#                          224, 224)  # image size is 224
net_adam.blobs['data'].reshape(64,        # batch size
                          3,         # 3-channel (BGR) images
                          224, 224)


In [24]:
# for each layer, show the output shape
for layer_name, blob in net_adam.blobs.items():
    print (layer_name + '\t' + str(blob.data.shape))

data	(64, 3, 224, 224)
conv1/7x7_s2	(1, 64, 112, 112)
pool1/3x3_s2	(1, 64, 56, 56)
pool1/norm1	(1, 64, 56, 56)
conv2/3x3_reduce	(1, 64, 56, 56)
conv2/3x3	(1, 192, 56, 56)
conv2/norm2	(1, 192, 56, 56)
pool2/3x3_s2	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_0	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_1	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_2	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_3	(1, 192, 28, 28)
inception_3a/1x1	(1, 64, 28, 28)
inception_3a/3x3_reduce	(1, 96, 28, 28)
inception_3a/3x3	(1, 128, 28, 28)
inception_3a/5x5_reduce	(1, 16, 28, 28)
inception_3a/5x5	(1, 32, 28, 28)
inception_3a/pool	(1, 192, 28, 28)
inception_3a/pool_proj	(1, 32, 28, 28)
inception_3a/output	(1, 256, 28, 28)
pool5/7x7_s1	(1, 256, 22, 22)
loss3/classifier_new	(1, 3)
softmax	(1, 3)


In [25]:
categories=[x.strip() for x in open('labels_adam.txt').readlines()]
print (categories)

['car', 'human', 'noise']


In [26]:
%%time
preds_adam = []
from timeit import default_timer as timer
time_start = timer()
#this is very ineffective O(n^2)+complexity of network
for i, x in enumerate(categories):
    for j, y in enumerate(sorted(glob.glob('Data/test/{}/*'.format(x)))):
        net_adam.blobs['data'].data[...] = transformer.preprocess('data',caffe.io.load_image(y))
        net_adam.reshape()
        pred = net_adam.forward()
        preds_adam.append(np.argmax(pred['softmax']))
        #print(y, categories[np.argmax(pred['softmax'])])

time_end = timer()
fps_adam=('FPS: %.2f fps' % (1000/(time_end-time_start)))
print('FPS: %.2f fps' % (1000/(time_end-time_start)))

FPS: 45.76 fps
CPU times: user 21.8 s, sys: 43.9 ms, total: 21.9 s
Wall time: 21.9 s


In [27]:
# convert the data and labels to NumPy arrays
preds_adam = np.array(preds_adam)

# perform one-hot encoding on the labels
lb = LabelBinarizer()
preds_adam = lb.fit_transform(preds_adam)
#preds = to_categorical(preds)
print(preds_adam)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [28]:
params_adam = str(sum([prod(v[0].data.shape) for k, v in net_adam.params.items()]))
print ("Total number of parameters: " + str(sum([prod(v[0].data.shape) for k, v in net_adam.params.items()])))

Total number of parameters: 659136


In [29]:
print(y_test)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [30]:
print(classification_report(y_test, preds_adam))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.99      0.99      0.99       300
           2       1.00      0.99      0.99       300

   micro avg       0.99      0.99      0.99       900
   macro avg       0.99      0.99      0.99       900
weighted avg       0.99      0.99      0.99       900
 samples avg       0.99      0.99      0.99       900



In [31]:
mAP_adam = str(average_precision_score(y_test, preds_adam, average="samples"))
print(average_precision_score(y_test, preds_adam, average="samples"))

0.9948148148148148


# BN

In [42]:
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

model_def = 'human_car_deploy.prototxt'
#model_weights = '_iter_800000.caffemodel'
model_weights = 'snapshot/_iter_272000.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)

In [34]:
caffe_root = '/home/vadim/caffe/'
categories=[x.strip() for x in open('labels.txt').readlines()]
print (categories)

['noise', 'car', 'human']


In [35]:
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print ('mean-subtracted values:', zip('BGR', mu))

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

mean-subtracted values: <zip object at 0x7f7720152a50>


In [36]:
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
#net.blobs['data'].reshape(50,        # batch size
#                          3,         # 3-channel (BGR) images
#                          224, 224)  # image size is 224
net.blobs['data'].reshape(32,        # batch size
                          3,         # 3-channel (BGR) images
                          224, 224)


In [37]:
# for each layer, show the output shape
for layer_name, blob in net.blobs.items():
    print (layer_name + '\t' + str(blob.data.shape))

data	(32, 3, 224, 224)
conv1/7x7_s2	(10, 64, 112, 112)
pool1/3x3_s2	(10, 64, 56, 56)
pool1/norm1	(10, 64, 56, 56)
conv2/3x3_reduce	(10, 64, 56, 56)
conv2/3x3	(10, 192, 56, 56)
conv2/norm2	(10, 192, 56, 56)
pool2/3x3_s2	(10, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_0	(10, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_1	(10, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_2	(10, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_3	(10, 192, 28, 28)
inception_3a/1x1	(10, 64, 28, 28)
inception_3a/3x3_reduce	(10, 96, 28, 28)
inception_3a/3x3	(10, 128, 28, 28)
inception_3a/5x5_reduce	(10, 16, 28, 28)
inception_3a/5x5	(10, 32, 28, 28)
inception_3a/pool	(10, 192, 28, 28)
inception_3a/pool_proj	(10, 32, 28, 28)
inception_3a/output	(10, 256, 28, 28)
inception_3a/output_inception_3a/output_0_split_0	(10, 256, 28, 28)
inception_3a/output_inception_3a/output_0_split_1	(10, 256, 28, 28)
inception_3a/output_inception_3a/output_0_split_2	(10, 256, 28, 28)
inception_3a/output_inception_3a/output_0_sp

In [38]:
params = str(sum([prod(v[0].data.shape) for k, v in net.params.items()]))
print ("Total number of parameters: " + str(sum([prod(v[0].data.shape) for k, v in net.params.items()])))

Total number of parameters: 5969344


In [39]:
categories=[x.strip() for x in open('labels.txt').readlines()]
print (categories)

['noise', 'car', 'human']


In [40]:
preds = []

In [43]:
%%time
from timeit import default_timer as timer
time_start = timer()
#this is very ineffective O(n^2)+complexity of network
for i, x in enumerate(categories):
    for j, y in enumerate(sorted(glob.glob('Data/test/{}/*'.format(x)))):
        net.blobs['data'].data[...] = transformer.preprocess('data',caffe.io.load_image(y))
        net.reshape()
        pred = net.forward()
        preds.append(np.argmax(pred['prob']))
        print(y, categories[np.argmax(pred['prob'])])

time_end = timer()
fps=('FPS: %.2f fps' % (1000/(time_end-time_start)))
print('FPS: %.2f fps' % (1000/(time_end-time_start)))

Data/test/noise/100093.JPEG noise
Data/test/noise/100132.JPEG noise
Data/test/noise/100386.JPEG noise
Data/test/noise/100422.JPEG noise
Data/test/noise/100426.JPEG noise
Data/test/noise/100508.JPEG noise
Data/test/noise/100517.JPEG noise
Data/test/noise/100598.JPEG noise
Data/test/noise/100627.JPEG noise
Data/test/noise/100750.JPEG noise
Data/test/noise/100762.JPEG noise
Data/test/noise/100901.JPEG noise
Data/test/noise/100923.JPEG noise
Data/test/noise/100931.JPEG noise
Data/test/noise/100946.JPEG noise
Data/test/noise/101035.JPEG noise
Data/test/noise/101153.JPEG noise
Data/test/noise/101264.JPEG noise
Data/test/noise/101305.JPEG noise
Data/test/noise/101411.JPEG noise
Data/test/noise/101500.JPEG noise
Data/test/noise/101533.JPEG noise
Data/test/noise/101605.JPEG noise
Data/test/noise/101609.JPEG noise
Data/test/noise/101733.JPEG noise
Data/test/noise/101767.JPEG noise
Data/test/noise/101800.JPEG noise
Data/test/noise/101849.JPEG noise
Data/test/noise/101855.JPEG noise
Data/test/nois

Data/test/noise/112545.JPEG noise
Data/test/noise/112640.JPEG noise
Data/test/noise/112702.JPEG noise
Data/test/noise/112725.JPEG noise
Data/test/noise/112782.JPEG noise
Data/test/noise/112869.JPEG noise
Data/test/noise/112912.JPEG noise
Data/test/noise/112953.JPEG noise
Data/test/noise/113106.JPEG noise
Data/test/noise/113140.JPEG noise
Data/test/noise/113149.JPEG noise
Data/test/noise/113219.JPEG noise
Data/test/noise/113240.JPEG noise
Data/test/noise/113254.JPEG noise
Data/test/noise/113309.JPEG noise
Data/test/noise/113314.JPEG noise
Data/test/noise/113337.JPEG noise
Data/test/noise/113443.JPEG noise
Data/test/noise/113485.JPEG noise
Data/test/noise/113751.JPEG noise
Data/test/noise/113797.JPEG noise
Data/test/noise/113904.JPEG noise
Data/test/noise/113974.JPEG noise
Data/test/noise/114086.JPEG noise
Data/test/noise/114156.JPEG noise
Data/test/noise/114163.JPEG noise
Data/test/noise/114205.JPEG noise
Data/test/noise/114271.JPEG noise
Data/test/noise/114371.JPEG noise
Data/test/nois

Data/test/car/110315.JPEG noise
Data/test/car/110327.JPEG noise
Data/test/car/110497.JPEG noise
Data/test/car/110499.JPEG noise
Data/test/car/110685.JPEG noise
Data/test/car/110714.JPEG noise
Data/test/car/110776.JPEG noise
Data/test/car/110791.JPEG noise
Data/test/car/110880.JPEG noise
Data/test/car/110899.JPEG noise
Data/test/car/110941.JPEG noise
Data/test/car/110959.JPEG noise
Data/test/car/111010.JPEG noise
Data/test/car/111012.JPEG noise
Data/test/car/111112.JPEG noise
Data/test/car/111141.JPEG noise
Data/test/car/111186.JPEG noise
Data/test/car/111217.JPEG noise
Data/test/car/111259.JPEG noise
Data/test/car/111283.JPEG noise
Data/test/car/111313.JPEG noise
Data/test/car/111346.JPEG noise
Data/test/car/111390.JPEG noise
Data/test/car/111600.JPEG noise
Data/test/car/111657.JPEG noise
Data/test/car/111679.JPEG noise
Data/test/car/111727.JPEG noise
Data/test/car/111758.JPEG noise
Data/test/car/111833.JPEG noise
Data/test/car/112096.JPEG noise
Data/test/car/112109.JPEG noise
Data/tes

Data/test/human/108495.JPEG noise
Data/test/human/108555.JPEG noise
Data/test/human/108612.JPEG noise
Data/test/human/108704.JPEG noise
Data/test/human/108723.JPEG noise
Data/test/human/108774.JPEG noise
Data/test/human/108841.JPEG noise
Data/test/human/108863.JPEG noise
Data/test/human/108915.JPEG noise
Data/test/human/108985.JPEG noise
Data/test/human/109025.JPEG noise
Data/test/human/109092.JPEG noise
Data/test/human/109159.JPEG noise
Data/test/human/109165.JPEG noise
Data/test/human/109223.JPEG noise
Data/test/human/109234.JPEG noise
Data/test/human/109267.JPEG noise
Data/test/human/109308.JPEG noise
Data/test/human/109365.JPEG noise
Data/test/human/109377.JPEG noise
Data/test/human/109422.JPEG noise
Data/test/human/109514.JPEG noise
Data/test/human/109541.JPEG noise
Data/test/human/109605.JPEG noise
Data/test/human/109617.JPEG noise
Data/test/human/109659.JPEG noise
Data/test/human/109695.JPEG noise
Data/test/human/109782.JPEG noise
Data/test/human/109784.JPEG noise
Data/test/huma

In [None]:
# convert the data and labels to NumPy arrays
preds = np.array(preds)

# perform one-hot encoding on the labels
lb = LabelBinarizer()
preds = lb.fit_transform(preds)
#preds = to_categorical(preds)
print(preds)

In [None]:
print(classification_report(y_test, preds))