In [23]:
print(classification_report(y_test, preds_adam))
print(fps_adam)
print('mAP: ' + mAP_adam)
print ("Total number of parameters: " + params_adam)

              precision    recall  f1-score   support

           0       1.00      0.99      0.99       300
           1       0.96      0.99      0.98       300
           2       0.99      0.98      0.98       300

   micro avg       0.98      0.98      0.98       900
   macro avg       0.98      0.98      0.98       900
weighted avg       0.98      0.98      0.98       900
 samples avg       0.98      0.98      0.98       900

FPS: 43.75 fps
mAP: 0.9896296296296297
Total number of parameters: 659136


In [1]:
import caffe
import glob
from numpy import prod
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import LabelBinarizer

In [2]:
caffe_root = '/home/vadim/caffe/'

In [3]:
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

model_def = 'deploy_1module_head.prototxt'
model_weights = '1module_iter_1116.caffemodel'
#model_def = 'deploy_1module_head_reduce.prototxt'
#model_weights = '1module_iter_1584.caffemodel'


net_adam = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)

In [4]:
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print ('mean-subtracted values:', zip('BGR', mu))

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net_adam.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

mean-subtracted values: <zip object at 0x7fb9ac38d1e0>


In [5]:
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
#net.blobs['data'].reshape(50,        # batch size
#                          3,         # 3-channel (BGR) images
#                          224, 224)  # image size is 224
net_adam.blobs['data'].reshape(64,        # batch size
                          3,         # 3-channel (BGR) images
                          224, 224)


In [6]:
# for each layer, show the output shape
for layer_name, blob in net_adam.blobs.items():
    print (layer_name + '\t' + str(blob.data.shape))

data	(64, 3, 224, 224)
conv1/7x7_s2	(1, 64, 112, 112)
pool1/3x3_s2	(1, 64, 56, 56)
pool1/norm1	(1, 64, 56, 56)
conv2/3x3_reduce	(1, 64, 56, 56)
conv2/3x3	(1, 192, 56, 56)
conv2/norm2	(1, 192, 56, 56)
pool2/3x3_s2	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_0	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_1	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_2	(1, 192, 28, 28)
pool2/3x3_s2_pool2/3x3_s2_0_split_3	(1, 192, 28, 28)
inception_3a/1x1	(1, 64, 28, 28)
inception_3a/3x3_reduce	(1, 96, 28, 28)
inception_3a/3x3	(1, 128, 28, 28)
inception_3a/5x5_reduce	(1, 16, 28, 28)
inception_3a/5x5	(1, 32, 28, 28)
inception_3a/pool	(1, 192, 28, 28)
inception_3a/pool_proj	(1, 32, 28, 28)
inception_3a/output	(1, 256, 28, 28)
pool5/7x7_s1	(1, 256, 22, 22)
loss3/classifier_new	(1, 3)
softmax	(1, 3)


In [7]:
categories=[x.strip() for x in open('labels_adam.txt').readlines()]
print (categories)

['car', 'human', 'noise']


In [8]:
%%time
preds_adam = []
from timeit import default_timer as timer
time_start = timer()
#this is very ineffective O(n^2)+complexity of network
for i, x in enumerate(categories):
    for j, y in enumerate(sorted(glob.glob('Data/test/{}/*'.format(x)))):
        net_adam.blobs['data'].data[...] = transformer.preprocess('data',caffe.io.load_image(y))
        net_adam.reshape()
        pred = net_adam.forward()
        preds_adam.append(np.argmax(pred['softmax']))
        #print(y, categories[np.argmax(pred['softmax'])])

time_end = timer()
fps_adam=('FPS: %.2f fps' % (1000/(time_end-time_start)))
print('FPS: %.2f fps' % (1000/(time_end-time_start)))

FPS: 43.75 fps
CPU times: user 22.1 s, sys: 67.3 ms, total: 22.2 s
Wall time: 22.9 s


In [9]:
# convert the data and labels to NumPy arrays
preds_adam = np.array(preds_adam)

# perform one-hot encoding on the labels
lb = LabelBinarizer()
preds_adam = lb.fit_transform(preds_adam)
#preds = to_categorical(preds)
print(preds_adam)

[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [0 0 1]
 [0 0 1]
 [0 0 1]]


In [10]:
params_adam = str(sum([prod(v[0].data.shape) for k, v in net_adam.params.items()]))
print ("Total number of parameters: " + str(sum([prod(v[0].data.shape) for k, v in net_adam.params.items()])))

Total number of parameters: 659136


In [11]:
CURRENT_DIR = '/home/vadim/testovoe/dev/'

In [12]:
test_df = pd.read_csv(CURRENT_DIR+"Data/text.txt", sep="\s+", header=None, names=["name", "category"])

In [13]:
def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = math.floor(pad_diff[0]/2), math.ceil(pad_diff[0]/2)
    l, r = math.floor(pad_diff[1]/2), math.ceil(pad_diff[1]/2)
    if is_rgb:
        pad_width = ((t,b), (l,r), (0, 0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def preprocess_image(image_path, desired_size=224):
    im = Image.open(image_path)
    im = im.resize((desired_size, )*2, resample=Image.LANCZOS)
    
    return im

In [14]:
N = test_df.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(test_df['name'])):
    x_test[i, :, :, :] = preprocess_image(
         f'/home/vadim/testovoe/dev/{image_id}'
    )

100%|██████████| 900/900 [00:00<00:00, 1076.77it/s]


In [15]:
y_test = pd.get_dummies(test_df['category']).values

In [21]:
labels_file = 'labels_adam.txt'

labels = np.loadtxt(labels_file, str, delimiter='\t')

print(labels)

['car' 'human' 'noise']


In [16]:
print(classification_report(y_test, preds_adam))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99       300
           1       0.96      0.99      0.98       300
           2       0.99      0.98      0.98       300

   micro avg       0.98      0.98      0.98       900
   macro avg       0.98      0.98      0.98       900
weighted avg       0.98      0.98      0.98       900
 samples avg       0.98      0.98      0.98       900



In [22]:
mAP_adam = str(average_precision_score(y_test, preds_adam, average="samples"))
print(average_precision_score(y_test, preds_adam, average="samples"))

0.9896296296296297
