## Get DenseNet bottleneck features with pretrained model in PyTorch

In [5]:
import os
from sklearn.datasets import load_files  
from keras.preprocessing import image 
import numpy as np
from keras.utils import np_utils

import torch
from torch.autograd import Variable
from torch import Tensor
import torch.nn as nn
from torchvision import transforms
from torchvision.models import densenet161
from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Convert image paths to torch variables
# i.e., 'channel' is the 2nd dimention, not the last

def load_dataset(directory):
    data = load_files(directory)
    file_paths = np.array(data['filenames'])
    file_targets = np_utils.to_categorical(np.array(data['target']), 133)
    return file_paths, file_targets

def path_to_tonsor(img_path):
    img = Image.open(img_path)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)), 
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                             std = [0.229, 0.224, 0.225]),
    ])
    output = preprocess(img).unsqueeze(0)
    return output

def paths_to_variables(img_paths):
    list_of_tonsors = [path_to_tonsor(img_path) for img_path in img_paths]
    tonsors = torch.cat(list_of_tonsors, 0)
    variables = Variable(tonsors, requires_grad=False)
    return variables

def paths_to_features(img_paths):
    variables = paths_to_variables(img_paths)
    
    densenet = densenet161(pretrained = True)
    new_classifier = nn.Sequential(*list(densenet.classifier.children())[:-1])
    densenet.classifier = new_classifier
    densenet.eval()
    features = densenet.forward(variables)      
    return features.data.numpy()


### Pass forward the images to get the bottleneck feature maps

Due to the limit capacity of my laptop, passing too many images to the DenseNet network at once would stop the kernel. So I send each time a batch of 100 images, save the features in a temporary folder, repeat untill all features are obtained. After doing this for 67 (9 and 9 respectively) times, for the train set (validation set and test set respectively), I got all the feature maps needed and concatenating them respectively gives the three *.npy* files in the directory of *bottleneck_features*.

## Now do the same *withOUT* PyTorch, since PyTorch cannot be used on a Window machine

I found the following implementations of DenseNet in Keras:

* The model in [repository](https://github.com/flyyufelix/DenseNet-Keras.git), stopped at the weight loading stage, due to errors saying that dimensions do not match. 
* The model in [repository](https://github.com/titu1994/DenseNet.git) can successfully return bottleneck feature maps after modifications:
    - was: ~~requie_flatten = include_top~~
    + is : include_top = include_top
    - was: ~~default_size = 32~~
    + is : default_size = 224

In [8]:
import densenet

image_dim = (224, 224, 3)
DenseNet_second = densenet.DenseNetImageNet161(input_shape=image_dim, include_top=False)

Weights for the model were loaded successfully


In [None]:
from keras.preprocessing import image                  
from tqdm import tqdm
from sklearn.datasets import load_files  
import numpy as np
from keras.utils import np_utils
from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True

def load_dataset(directory):
    data = load_files(directory)
    file_paths = np.array(data['filenames'])
    file_targets = np_utils.to_categorical(np.array(data['target']), 133)
    return file_paths, file_targets
train_file_paths, train_file_targets = load_dataset('dogImages/train')

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
       
    return densenet.preprocess_input(np.expand_dims(x, axis=0))

In [None]:
# forward the images to get the bottleneck features, in batchs
for i in range(1, 67):
    path_slice = train_file_paths[(i-1)*100:i*100]
    DenseNet_output = [DenseNet_second.predict(path_to_tensor(path)) for path in path_slice]
    np.save('bottleneck_features/train/train_batch_'+str(i)+'.npy', DenseNet_output)

In [8]:
import numpy as np
DenseNetFeature_test = np.load('bottleneck_features/test/test_batch_1.npy')
for i in range(2, 10):
    DenseNetFeature_test = np.concatenate((DenseNetFeature_test, 
                                            np.load('bottleneck_features/test/test_batch_'+str(i)+'.npy')))
np.save('bottleneck_features/DenseNetFeature_test.npy', DenseNetFeature_test)

In [23]:
np.save('bottleneck_features/train_DenseNet.npy', DenseNetFeature_train.reshape(6680, 2208))