# Neural Network Embeddings as Feature Vectors

In this exercise, you will load the weights of a pre-trained network and pass an image through the network to get the resulting embedding vector. This output can then be used as a feature vector for any classifier.

We are going to use the [ResNet50](https://pytorch.org/vision/stable/models.html) pre-trained network, which has been loaded for you below.

In [5]:
import numpy as np
import matplotlib.pyplot as plt

from glob import glob

from PIL import Image
from torch import nn
import torchvision.models as models
from torchvision import transforms
from torchsummary import summary

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

import seaborn as sn
from sklearn.metrics import confusion_matrix

# Part 1 -- Extract Embedding Features From A Single Image

In [6]:
# Path to Dataset
root_path = '../image_classification/Intel Training Dataset/'

# split into subfolders based on class label
subfolders = sorted(glob(root_path + '*'))
label_names = [p.split('/')[-1] for p in subfolders]

label_names = []
for p in subfolders:
  label_names.append(p.split('/')[-1])

print(label_names)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [7]:
# load the model
resnet50 = models.resnet50(pretrained=True)

# get layers
def slice_model(original_model, from_layer=None, to_layer=None):
    return nn.Sequential(*list(original_model.children())[from_layer:to_layer])

model_conv_features = slice_model(resnet50, to_layer=-1).to("cpu")
summary(model_conv_features, input_size=(3,224,224))



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

# Part 2 -- Extract Features From A Few Images

Using the first 20 images per class, extract and store embedding features for each image.



In [8]:
# preprocess
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def retype_image(in_img):
    if np.max(in_img) > 1:
       in_img = in_img.astype(np.uint8)
    else:
        in_img = (in_img * 255.0).astype(np.uint8)
    return in_img

In [9]:
# put the nn in evaluation mode
resnet50.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [20]:
fname = sorted(glob(subfolders[0] + "/*.jpg"))[0]
test_img = plt.imread(fname)

proc_img = preprocess(Image.fromarray(retype_image(test_img)))
emb = model_conv_features(proc_img.unsqueeze(0).to("cpu")).squeeze().detach().numpy()
print(emb.shape)

(2048,)


In [22]:
# train model on new features produced by pretrained network
num_per_class = 20

features = []
labels = []

for i in range(len(subfolders)):
    fnames = sorted(glob(subfolders[i] + "/*.jpg"))

    for j in range(num_per_class):
        img = plt.imread(fnames[j])
        proc_img = preprocess(Image.fromarray(retype_image(img)))
        feat = model_conv_features(proc_img.unsqueeze(0).to("cpu")).squeeze().detach().numpy()
        
        labels.append(i)
        features.append(feat)


In [23]:
features = np.array(features)
labels = np.array(labels)

print(features.shape)

(120, 2048)


In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    features,
    labels,
    test_size = 0.2,
    stratify=labels,
    random_state=0
)

clf = make_pipeline(StandardScaler(), SVC(gamma="auto"))
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(f"Accuracy: {clf.score(X_test, y_test)}")

Accuracy: 0.75
