This file uses a Python2 library from Github (https://github.com/vadimkantorov/caffemodel2pytorch) to convert Caffe models into PyTorch. The original model was trained with Caffe. Remarkably, you don't ever have to install caffe for it to work!

I converted it to work on Python3.


Run `merge_nps` to combine the files created in this script. I break the computation into pieces because sometimes the script fails partially through.

In [1]:
from caffemodel2pytorch import *
import torch
torch.cuda.is_available()

True

In [2]:
model = Net(
    prototxt = 'predicting_poverty_deploy.prototxt',
    weights = 'predicting_poverty_trained.caffemodel',
    caffe_proto = 'https://raw.githubusercontent.com/BVLC/caffe/master/src/caffe/proto/caffe.proto'
)

caffemodel2pytorch: loading model from [predicting_poverty_trained.caffemodel] in HDF5 format failed [No module named 'h5py'], falling back to caffemodel format
caffemodel2pytorch: loaded model from [predicting_poverty_trained.caffemodel] in caffemodel format


In [3]:
model

Net(
  (conv1): Convolution(3, 64, kernel_size=(11, 11), stride=(4, 4))
  (relu1): ReLU()
  (norm1): LocalResponseNorm(5, alpha=0.0005, beta=0.75, k=1.0)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Convolution(64, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (norm2): LocalResponseNorm(5, alpha=0.0005, beta=0.75, k=1.0)
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (conv4): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU()
  (conv5): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU()
  (pool5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv6): Convolution(256, 4096, kernel_size=(6, 6), stride=(6, 6))
  (relu6): ReLU()
  (conv7): Convolution(4096, 4

In [4]:
# we strip the last layers, so the output is just what was at the conv7 layer
# the paper uses the data at this layer as the "features" for the image
del model.prob, model.pool6, model.conv8, model.relu7

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()
model.to(device)

Net(
  (conv1): Convolution(3, 64, kernel_size=(11, 11), stride=(4, 4))
  (relu1): ReLU()
  (norm1): LocalResponseNorm(5, alpha=0.0005, beta=0.75, k=1.0)
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Convolution(64, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (norm2): LocalResponseNorm(5, alpha=0.0005, beta=0.75, k=1.0)
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (conv4): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU()
  (conv5): Convolution(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU()
  (pool5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv6): Convolution(256, 4096, kernel_size=(6, 6), stride=(6, 6))
  (relu6): ReLU()
  (conv7): Convolution(4096, 4

In [6]:
from torchvision import datasets, models, transforms

In [7]:
# turn the data into a PyTorch Tensor
data_transforms = {
    'transform': transforms.Compose([
        transforms.ToTensor(),
    ]),
}

In [8]:
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# takes a 
def filename_to_im_tensor(file):
    im = plt.imread(file)[:,:,:3]
    im = (im*256)
    im -= np.array([103.334, 107.8797, 107.4072])
    # convert to BGR, their Github code says to do this
    input_img = im[:, :, [2, 1, 0]]
    # convert to D,H,W
    input_img = np.transpose(input_img, [2, 0, 1])
    # subtract the mean to normalize the image, their Github code says to do this
    mean_bgr = [103.334, 107.8797, 107.4072]
    for i in range(0, 3):
        input_img[i, :, :] = input_img[i, :, :] - mean_bgr[i]
    
    im = Image.fromarray(im.astype(np.uint8))
    im = data_transforms['transform'](im)
    return im[None].to(device)

In [9]:
import os
import glob

In [10]:
dirlist = glob.glob('../process_data/data/ims_*/')
countries = [fname[fname.find('ims_')+4:-1] for fname in dirlist]
dirlist, countries

(['../process_data/data/ims_nigeria_2013/',
  '../process_data/data/ims_tanzania_2011/',
  '../process_data/data/ims_malawi_2016/',
  '../process_data/data/ims_uganda_2011/'],
 ['nigeria_2013', 'tanzania_2011', 'malawi_2016', 'uganda_2011'])

In [15]:
for c in countries: 
    image_dir = '../process_data/data/ims_{}/'.format(c)
    print(image_dir)
    ims = os.listdir(image_dir)
    feats = np.zeros((len(ims), 4096))
    i = 0
    batch_size = 4
    pre = image_dir+'{}'
    print(pre)

    # this approach uses batching and should offer a speed-up over passing one image at a time by nearly 10x
    # runtime should be 5-7 minutes vs 45+ for a full forward pass
    while i + batch_size < len(ims):
        ims_as_tensors = torch.cat([filename_to_im_tensor(pre.format(ims[i+j])) for j in range(batch_size)], 0)
        feats[i:i+batch_size,:] = np.squeeze(model(ims_as_tensors)['conv7'].cpu().detach().numpy())
        i += batch_size
        if i % 100 == 0:
            print(i, end=', ')

    # does the final batch of remaining images
    if len(ims) - i != 0:
        rem = len(ims) - i
        ims_as_tensors = torch.cat([filename_to_im_tensor(pre.format(ims[i+j])) for j in range(rem)], 0)
        feats[i:i+rem,:] = np.squeeze(model(ims_as_tensors)['conv7'].cpu().detach().numpy())
        i += rem
    np.save('{}_forward_feats.npy'.format(c), feats)

../process_data/data/ims_uganda_2011/
../process_data/data/ims_uganda_2011/{}
100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900, 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11700, 11800, 11900, 12000, 12100, 12200, 12300, 12400, 12500, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13700, 13800, 13900, 14000, 14100, 14200, 14300, 14400, 14500, 14600, 14700, 1