In [1]:
import torch 
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image


In [14]:
class EncoderCNN(nn.Module):
    def __init__(self,embed_size):
        super(EncoderCNN,self).__init__()
        resnet=models.resnet50(pretrained=True)
        for parameters in resnet.parameters():
            parameters.requires_grad_(False)
        
        modules=list(resnet.children())[:-1]
        self.resnet=nn.Sequential(*modules)
        self.embed=nn.Linear(resnet.fc.in_features,embed_size)

    def forward(self,images):
        features=self.resnet(images)
        features=features.view(features.size(0),-1)
        features=self.embed(features)
        return features

In [15]:
path="IMG_20240603_143057.jpg"
image=Image.open(path).convert('RGB')

preprocess=transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])
image_tensor=preprocess(image).unsqueeze(0)



In [16]:
size=256
encoder=EncoderCNN(size)

device=torch.device("cpu")
encoder.to(device)

encoder.eval()

image_tensor=image_tensor.to(device)

output=image_tensor

for name,module in encoder.resnet.named_children():
    output=module(output)
    print(f"Output after {name}:  {output.shape}")

output=output.view(output.size(0), -1)
output=encoder.embed(output)
print(f"Final feature vector shape: {output.shape} ")


Output after 0:  torch.Size([1, 64, 112, 112])
Output after 1:  torch.Size([1, 64, 112, 112])
Output after 2:  torch.Size([1, 64, 112, 112])
Output after 3:  torch.Size([1, 64, 56, 56])
Output after 4:  torch.Size([1, 256, 56, 56])
Output after 5:  torch.Size([1, 512, 28, 28])
Output after 6:  torch.Size([1, 1024, 14, 14])
Output after 7:  torch.Size([1, 2048, 7, 7])
Output after 8:  torch.Size([1, 2048, 1, 1])
Final feature vector shape: torch.Size([1, 256]) 


In [17]:
print("Embedding vector: ",output)

Embedding vector:  tensor([[-0.3779,  0.5733, -0.5492, -0.2237, -0.3551,  0.3335,  0.0900,  0.0774,
          0.0672, -0.2696, -0.4487, -0.6492, -0.1582,  0.1932, -0.0279,  0.1868,
          0.1633, -0.0349,  0.4709,  0.5265, -0.0363,  0.5123,  0.0569, -0.1499,
          0.9710,  0.0807, -0.0119,  0.0489,  0.1893,  0.2421,  0.3596,  0.3336,
          0.4300, -0.0028, -0.0322,  0.2212, -0.3928, -0.1116, -0.2976,  0.0805,
          0.2259,  0.0778, -0.1085, -0.0791,  0.4387,  0.0132,  0.1238, -0.0898,
          0.0500,  0.0041,  0.5562, -0.3675,  0.4827,  0.1344,  0.4219,  0.3893,
         -0.3230,  0.2005, -0.3687, -0.0360,  0.1939,  0.2200,  0.9519,  0.1055,
          0.4922,  0.5174, -0.0664,  0.4541,  0.0397,  0.0821,  0.0066, -0.0552,
          0.1160,  0.0132, -0.0238,  0.1272, -0.1236, -0.0730,  0.0911, -0.0130,
          0.1063,  0.0208, -0.1251,  0.0986, -0.3862, -0.0934, -0.2279,  0.0674,
          0.0103,  0.2360,  0.6793,  0.2747,  0.0046,  0.3297, -0.2134, -0.0567,
         

In [None]:
image_tensor=image_tensor.to(device)

output=image_tensor
feature_maps=[]

for name,module in encoder.resnet.named_children():
    output=module(output)
    if isinstance(output,torch.Tensor):
        feature_maps.append(output.clone().detach())
        
num_features=feature_maps[0].shape[1]
fig, axs =plt.subplots(8,8,figsize(16,16))

for i in range(8):
    for j in range(8):
        feature_map=feature_maps[0][0,i*8+j].cpu().numpy()
        axs[i,j].imshow(feature_map,cmap='viridis')
        axs[i,j].axis('off')