In [1]:
import matplotlib.pyplot as plt
%matplotlib

import numpy as np
from PIL import Image
import caffe
import os
import sys
from tqdm import tqdm

import lmdb
from caffe.proto import caffe_pb2
import h5py

Using matplotlib backend: TkAgg


In [2]:
caffe.set_mode_gpu()
caffe.set_device(5)

In [3]:
rgb_mean = np.array([ 103.86496098,  116.78539062,  123.68693434])[:, None, None]

In [4]:
net = caffe.Net('ResNet-50-deploy.prototxt',
                'ResNet-50-model.caffemodel',
                caffe.TEST)

In [5]:
out_dir = '/imagenet/SegCompress'
lmdb_env = lmdb.open('/imagenet/lmdb_scaled/training.lmdb/', readonly=True)
net_batch_size = 32
out_batch_size = 256*net_batch_size  # must be multiple of net_batch_size
net.blobs['data'].reshape(net_batch_size, 3, 224, 224)

In [None]:
with lmdb_env.begin() as txn:
    out = np.empty((out_batch_size, net.blobs['fc1000'].data.shape[1]), dtype=net.blobs['fc1000'].data.dtype)
    images = np.empty((out_batch_size, 3, 256, 256), dtype=np.uint8)
    datum = caffe_pb2.Datum()
    
    center_slice = slice(16, 256-16)
    i = 0
    cnt = 0
    n_entry = 1
    offset = 0
#    print('No of imgs in db:',txn.stat()['entries'])
    for key, value in txn.cursor(): 

        # load image
        datum.ParseFromString(value)
        img = caffe.io.datum_to_array(datum)            
        images[i+offset] = img
        
        sys.stdout.write("img %d of \r" % (n_entry) )
        sys.stdout.flush()
        
#        print("img {} of {}".format(n_entry,txn.stat()['entries']) )
#        print("net_batch {} of {}".format(i+1, net_batch_size))
#        print("offset {} of {}".format(offset+1, out_batch_size))
#        print("filling images at {}".format(i+offset+1))
        
        # fills network with net_batch_size of images
        net.blobs['data'].data[i,:,:,:] = img[:, center_slice, center_slice] - rgb_mean
        
        i += 1
        # fowards the net everytime a net_batch is full
        if i == net_batch_size:
#            print("### Forward Net ###")
            i = 0
            net.forward()
            out[offset:(offset+net_batch_size)] = net.blobs['fc1000'].data[:]
            offset += net_batch_size
        # saves a hdf5 db everytime an out_batch is fully predicted
        if offset == out_batch_size:
            offset = 0
#            print("### save hdf5 ###")
            with h5py.File(os.path.join(out_dir, "batch_%04d.h5" % cnt), "w") as f:
                f.create_dataset('data', data=images, compression='gzip', dtype=np.uint8)
                f.create_dataset('label', data=out, compression='gzip', dtype=net.blobs['fc1000'].data.dtype)
            cnt += 1
        n_entry += 1
        

