@christofhenkel kernel on image feature extraction with pretrained models in Keras (https://www.kaggle.com/christofhenkel/extract-image-features-from-pretrained-nn) inspired me to do the same thing with PyTorch. In this kernel I demonstrate how to extract features with pretrained Inception_v3 model in PyTorch. Previously, @pvlima posted a great kernel on image classification with pretrained models in PyTorch (https://www.kaggle.com/pvlima/use-pretrained-pytorch-models). But, unlike in Keras, pretrained models in PyTorch contain the last FC layer which, in the case of feature extraction, is unnecessary. Also, the trick with Inception_v3 is that it has two outputs: there's an auxiliary branch in the network (https://arxiv.org/abs/1409.4842, https://arxiv.org/abs/1512.00567) which helps with the classification task, but useless for our goal. So, in order to get rid of all unnecessary parts, I inherited Inception_v3 class and overrode the forward method. Also, I added extra 1D average pooling layer to reduce the number of features (originally, it's a 2048D vector). 

In [None]:
import time
import cv2
import pandas as pd
import numpy as np
import os
from tqdm import tqdm, tqdm_notebook
from PIL import Image
from os.path import isfile, join, abspath, exists, isdir, expanduser
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision.models.inception import *
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim import lr_scheduler

import random

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
img_size = 299 # for Incerption V3
BATCH_SIZE = 128
N_EPOCHS = 5

In [None]:
train_path = "../input/petfinder-adoption-prediction/train_images/"
test_path = "../input/petfinder-adoption-prediction/test_images/"
train_df = pd.read_csv('../input/petfinder-adoption-prediction/train/train.csv')
test_df = pd.read_csv('../input/petfinder-adoption-prediction/test/test.csv')

In [None]:
train_df.Name = train_df.Name.fillna('GOTNONAME')
test_df.Name = test_df.Name.fillna('GOTNONAME')

train_df.Description = train_df.Description.fillna('GOTNODESC')
test_df.Description = test_df.Description.fillna('GOTNODESC')

### Copy pretrained models weights into ~/.torch - default directory for model weights

In [None]:
# https://www.kaggle.com/pvlima/use-pretrained-pytorch-models

cache_dir = expanduser(join('~', '.torch'))
if not exists(cache_dir):
    os.makedirs(cache_dir)
models_dir = join(cache_dir, 'models')
if not exists(models_dir):
    os.makedirs(models_dir)

In [None]:
!ls ../input/

In [None]:
!cp ../input/pytorch-pretrained-models/* ~/.torch/models/

In [None]:
!ls ~/.torch/models

### Seed everything for deterministic result

In [None]:
# https://www.kaggle.com/bminixhofer/deterministic-neural-networks-using-pytorch
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()

### Image preprocessing

In [None]:
# https://www.kaggle.com/christofhenkel/extract-image-features-from-pretrained-nn
def resize_to_square(im):
    old_size = im.shape[:2] # old_size is in (height, width) format
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    # new_size should be in (width, height) format
    im = cv2.resize(im, (new_size[1], new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
    return new_im

def load_image(path, pet_id):
    image = cv2.imread(f'{path}{pet_id}-1.jpg')
    new_image = resize_to_square(image)
    new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
    #new_image = preprocess_input(new_image)
    return new_image

### Define DataLoader that uploads and preprocess images

In [None]:
class ImageLoader(Dataset):

    def __init__(self, list_IDs, labels=None, dir_name=None, transform=None, return_id=False):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs
        self.dir_name = dir_name
        self.transform = transform
        self.return_id = return_id
    
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)
    
    def __getitem__(self, index):
        'Generates one sample of data'
        ID = self.list_IDs[index]

        try: 
            X = load_image(self.dir_name, ID)
        except:
            X = np.zeros((img_size, img_size, 3))
            
        if self.transform:
            X = self.transform(X)
        
        if self.labels and self.return_id:
            return X, y, ID
        elif self.labels:
            return X, y
        elif self.return_id:
            return X, ID
        else:
            return X

In [None]:
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
ds_trans = transforms.Compose([
                               #transforms.Scale(224),
                               #transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               normalize])

We retain Inception constructor as is, so that we initiate all the layers in the network, even those that we won't use later. We add an extra field *final_pooling* that equals to the kernel size of the last 1d average pooling layer

In [None]:
class CustomInception3(Inception3):
    def __init__(self, num_classes=1000, aux_logits=False, transform_input=False, final_pooling=None):
        self.final_pooling = final_pooling
        super(CustomInception3, self).__init__()
        
    def forward(self, x):
        if self.transform_input:
            x = x.clone()
            x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
        # 299 x 299 x 3
        x = self.Conv2d_1a_3x3(x)
        # 149 x 149 x 32
        x = self.Conv2d_2a_3x3(x)
        # 147 x 147 x 32
        x = self.Conv2d_2b_3x3(x)
        # 147 x 147 x 64
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # 73 x 73 x 64
        x = self.Conv2d_3b_1x1(x)
        # 73 x 73 x 80
        x = self.Conv2d_4a_3x3(x)
        # 71 x 71 x 192
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # 35 x 35 x 192
        x = self.Mixed_5b(x)
        # 35 x 35 x 256
        x = self.Mixed_5c(x)
        # 35 x 35 x 288
        x = self.Mixed_5d(x)
        # 35 x 35 x 288
        x = self.Mixed_6a(x)
        # 17 x 17 x 768
        x = self.Mixed_6b(x)
        # 17 x 17 x 768
        x = self.Mixed_6c(x)
        # 17 x 17 x 768
        x = self.Mixed_6d(x)
        # 17 x 17 x 768
        x = self.Mixed_6e(x)
        # 17 x 17 x 768
        
        ## we turn off auxiliary
        #if self.training and self.aux_logits:
        #    aux = self.AuxLogits(x)
        
        # 17 x 17 x 768
        x = self.Mixed_7a(x)
        # 8 x 8 x 1280
        x = self.Mixed_7b(x)
        # 8 x 8 x 2048
        x = self.Mixed_7c(x)
        # 8 x 8 x 2048
        x = F.avg_pool2d(x, kernel_size=8) # size (batch_size, 2048, 1, 1)
        # 1 x 1 x 2048
        
        ## We'll save average pooling over the last conv output, but turn off the last FC layer
       
        #x = F.dropout(x, training=self.training)
        # 1 x 1 x 2048
        #x = x.view(x.size(0), -1)
        # 2048
        #x = self.fc(x)
        
        ## turn off aux output
        # 1000 (num_classes)
        #if self.training and self.aux_logits:
        #    return x, aux
        
        if self.final_pooling:
            x = F.avg_pool1d(x.view(x.size(0), 2048, 1).permute(0, 2, 1), kernel_size=self.final_pooling)
        
        return x

### Create Custom Inception instance and load weights

In [None]:
inception_weights = "/tmp/.torch/models/inception_v3_google-1a9a5a14.pth"

We apply an extra 1D average pooling to the final 1 x 1 x 2048 activations. <br>
Here we use kernel of size 8, so the final output will have 256 features.

In [None]:
InceptionModel = CustomInception3(final_pooling=8)

InceptionModel.load_state_dict(torch.load(inception_weights))

### Extract features with CustomInception and concat them to the original dataframe

In [None]:
def find_ids_w_images(df, image_folder):
    pet_ids = [s.split("-1.jpg")[0] for s in os.listdir(image_folder) if s.endswith("-1.jpg")]
    #df_img = df[train_df.PetID.isin(pet_ids)]
    #pet_ids = df_img.PetID.values
    return pet_ids

In [None]:
def extract_features(df, image_folder, model):
    model.eval() 
    model = model.cuda()
    img_ids = find_ids_w_images(df, image_folder)
    
    data_ds = ImageLoader(img_ids, dir_name=image_folder, transform=ds_trans)
    data_dl = DataLoader(data_ds, batch_size=BATCH_SIZE, shuffle=False)
    
    with torch.no_grad():
        features = None
        for x in tqdm(data_dl, disable=True):
            x = x.cuda()
            output = model(x)

            if features is not None:
                features = torch.cat((features, output), 0)
            else:
                features = output
        
        features = features.view(features.size(0), -1)
        feat_df = pd.DataFrame(features.cpu().numpy(), columns=[f'img_{n}' for n in range(features.size(-1))])
        feat_df = pd.concat((pd.DataFrame({'PetID': img_ids}), feat_df), axis=1)
        
        feat_df = df.merge(feat_df, on='PetID', how='outer')
        
        feat_df = feat_df.fillna(0)
    
    return feat_df

In [None]:
train_df = extract_features(train_df, train_path, InceptionModel)

In [None]:
test_df = extract_features(train_df, train_path, InceptionModel)