# Image Encoding extraction using ResNet & Nested Dictionnary !

**March 2022**

**If you use parts of this notebook in your scripts/notebooks, giving  some kind of credit would be very much appreciated :)  You can for instance link back to this `notebook`, and `upvote it`. Thanks!**



In this notebook, there is three crucials part:
1.  Use of ``ImagesFolder`` and ``DataLoader`` with a Transform Pipeline that resizes and scales all the dataset ( >100k images ) **within 1 minutes**.
2.  Use of ``Resnet18`` ( or any pretrained model ) to encode every image to a feature vector of 512 dimensions.
3. Use of a ``nested dictionnary`` that mimics perfectly the same structure as the image folder contents. This allows you to select the feature vector of any images within a second, and use it for your model directly using the keys of the dictionary( more details in the last  section ).

**The particularity of this work is that it allows you to use the information of the images directly in your model as complementary features, unlike notebooks that propose only a resized version of the images, this notebook proposes directly the encoding of any pretrained model using an appropriate structure thanks to nested dictionnary.**

In [None]:
import numpy as np
import torch 
import random
import torch.nn as nn
import torch.optim as optim 
import os
import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torchvision import transforms

import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# ImageFolder & DataLoader

In [None]:
images_path = "../input/h-and-m-personalized-fashion-recommendations/images"

In [None]:
def load_data(images_path, batch_size):

    transformer = transforms.Compose(
        [transforms.Resize((224,224)), # Resize images to (3,64,64) to speed up the process 
         transforms.ToTensor(),        # Transform numpy to torch tensors
         transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize images using the standardization used for Resnet Training
                              std=[0.229, 0.224, 0.225]),
         ])
    data = torchvision.datasets.ImageFolder(root=images_path, transform=transformer) # Extract all images for the subfolders
    data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=4) # Initialize a dataloader from the latter
    del data
    return data_loader


In [None]:
# Check for GPU 
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
# Creation of the dataloader
batch_size = 32
imgs_dataloader = load_data(images_path, batch_size)

# Resnet18

In [None]:
from torchvision.models import resnet18

model = resnet18(pretrained=True) 
resnet = torch.nn.Sequential(
    # Extract only the encoding part of the Resnet and omit the classifier part
    nn.Sequential(*list(model.children())[:-1]),
    torch.nn.Flatten()
)

In [None]:
# This function allows to automatically adapt the shape of the placeholder tensor "encoding"
# to the encoding size of any pretrained model : vgg,resnet,mobilenet...
def get_output_shape(model_, image_dim=(1,3,224,224)): 
    return model_(torch.rand(*(image_dim))).data.shape[1]

# For example the output of this function for Resnet18 is 512.
encoding_dim =  get_output_shape(resnet) 


In [None]:
print("initialize placeholder ....") 

# Initialize an empty tensor
# Here, because of GPU limits, I had to initialize the empty tensor on the CPU

encoding = np.zeros((len(imgs_dataloader.dataset),encoding_dim))
print("Finish !")
# I choose a numpy array to enable the use of the encoding for both pytorch and tensorflow community


In [None]:
## Extract the resnet encoding for each image
def resnet_encoding(dataloader,encoding,model_):  


    # Compute number of batches
    print("Resnet18 inference begins ...")
    nb_batches = int(len(dataloader.dataset) / batch_size) + 1
    generator = iter(dataloader)
    del dataloader
    model_ = model_.to(device)
    for i in tqdm(range(nb_batches), position=0, leave=True):
        x,_ = next(generator)
        imgs = x.to(device)
        # for each batch, we do the inference of the Resnet18 to extract the encoding
        # Because the empty tensor is on CPU, I had to transfer the encoding to cpu
        encoding[i*batch_size:(i+1)*batch_size,:] = model_(imgs).cpu().detach().numpy()
        del imgs,x,_
    print("Finish !")
    
    return encoding


In [None]:
encoding = resnet_encoding(imgs_dataloader,encoding,resnet)

# Nested Dictionaries for the encoding ( Same structure as the image folder )

In [None]:

# This functions outputs nested dictionnary that contains subfolder id, image id, and its corresponding encoding,
# For example:
# final_dict = { '010' : { '0108775015': encoding_of_0108775015
#                          '0108775044': encoding_of_0108775044
#                           ...
#                        },
#                '011': {....},
#                '012': {....},
#                ...,
#                ...,
#
#               '095': {....},
#
#              }

def encoding_dictionnary(images_path,encoding):
    
## EXTRACT NAMES OF EVERY SUBDIRECTORY OF THE IMAGE ROOT DIRECTORY. (ex : 010,011,....042,043) 
    print("Creation of the first dictionnary ..")
    subdirs_path_list = []
    for path, subdirs, files in os.walk(images_path,topdown=False):
        if subdirs != []: # Here with empirical tests, I noticed that there is a lot of empty lists, I didn't manage to check the reason 
            subdirs_path_list.append(subdirs)


            
    dict_imgs = {key: None for key in sorted(subdirs_path_list[0])}  # dictionnary that store subfolder name as dictionnary key. 
                                                                    # For the moment, the element of each key is empty
    print("Finish !")


## EXTRACT IMAGES ID OF EACH SUBDIRECTORY AS KEYS 
    l = []
    print("Extract Images ID of each subdirectory and encoding vectors.. ")
    for (_,_,files) in os.walk(images_path,topdown=False):

        l.append(files) # Append every subdir contents in a list

    ll = sorted(l)
    ll.pop(0)

    l_len = 0
    for key,d in zip(dict_imgs,ll):
        l = l_len
        l_len = len(d)
        dict_imgs[key] = {key_file: None for key_file in sorted(d)}
        for enc,dd in zip(encoding[l:l_len],sorted(d)):
            ## Add encoding vectors of each subfolders to the associate key and subkey
            dict_imgs[key][dd] = enc

    return dict(sorted(dict_imgs.items())) # Because the order is not respected at the first sight, I do this to have a structure similar to the initial tree of the dir/subdir of the images

In [None]:
encoded_dataset_dict = encoding_dictionnary(images_path,encoding)

#### Dump the encoding & the nested dictionary  

In [None]:
import pickle

### Resnet Encoding dump
encoding_file = open("H&M_IMAGES_ENCODING_Resnet18.pkl", "wb")

pickle.dump(encoding, encoding_file)

encoding_file.close()


### Nested dictionary dump
dict_file = open("H&M_IMAGES_ENCODING_dict.pkl", "wb")

pickle.dump(encoded_dataset_dict, dict_file)

dict_file.close()


a_file = open("H&M_IMAGES_ENCODING_Resnet18.pkl", "rb")

output = pickle.load(a_file)
output.shape

## Test 

I wrote this section to make sure that every item of each subkey is the same as the encoding 

In [None]:
x,y = next(iter(imgs_dataloader))
x = x.to(device)
encode_x = resnet(x)

In [None]:
# Check for correspondance
# You can try it for any item
encode_x[0].cpu().detach().numpy() == encoded_dataset_dict['010']['0108775015.jpg']

You can use ``PCA`` or ``Kernel PCA`` or any kind of dimensionality reduction technique to compress more the feature vector of the Resnet18 encoding...