In [253]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import vgg16
from PIL import Image
import os
from tqdm import tqdm
import h5py
import torch.nn.functional as F
import pandas as pd
from sklearn.model_selection import train_test_split

In [254]:
# Load features from the HDF5 file
input_file = 'features.h5'  # Adjust the filename as needed
with h5py.File(input_file, 'r') as hf:
    features_numpy = hf['features'][:]

In [255]:
indexes = list(range(0, len(features_numpy)))

In [256]:
features_tensor = torch.from_numpy(features_numpy)

In [257]:
len([tensor for _,tensor in enumerate(features_tensor)])

1124

In [258]:
features_tensor = features_tensor.cuda()

In [259]:
train_data,test_data,train_index,test_index = train_test_split(features_tensor,indexes,test_size=0.20,random_state=50)

In [260]:
# Load pre-trained VGG16 model and move it to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16_model = vgg16(pretrained=True).to(device)
vgg16_model = nn.Sequential(*list(vgg16_model.features.children()))

# Set the model to evaluation mode
vgg16_model.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [261]:
def extract_vgg_features(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = preprocess(image)
    image = torch.unsqueeze(image, 0).to(device)  # Add batch dimension and move to GPU

    # Forward pass to get features
    with torch.no_grad():
        features = vgg16_model(image)

    # Return the features
    return features

In [262]:
def find_most_similar(input_tensor, features_tensor):

    input_tensor_normalized = F.normalize(input_tensor,dim=0)
    features_tensor_normalized = F.normalize(features_tensor, dim=1)
    similarity_scores = []
    for tensor in features_tensor_normalized:
        tensor1_flat = input_tensor_normalized.view(1, -1)
        tensor2_flat = tensor.view(1, -1)
        cosine_similarity = F.cosine_similarity(tensor1_flat, tensor2_flat)
        similarity_scores.append(cosine_similarity.item())
    
    return similarity_scores.index(max(similarity_scores))

In [263]:
se = 0
for real_index,tensor in tqdm(zip(test_index,test_data)):
    max_score_index = find_most_similar(tensor, train_data)
    se = (max_score_index - real_index)**2

mse = se/len(test_index)

225it [00:31,  7.20it/s]


In [264]:
import math
rmse = math.sqrt(mse)
rmse

8.8

In [265]:
input_tensor.shape

torch.Size([1124, 512, 7, 7])

In [266]:
input_tensor

tensor([[[[1.6377, 1.5557, 1.4047,  ..., 1.3248, 0.0000, 0.0000],
          [0.1310, 2.9797, 6.4550,  ..., 7.7023, 4.2924, 2.1559],
          [0.0000, 0.0000, 5.3688,  ..., 3.9664, 0.6383, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.4846]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [2.4525, 0.0000, 0.1672,  ..., 2.0567, 2.7525, 1.4417],
          [5.0432, 4.3111, 3.7603,  ..., 4.2742, 4.6709, 2.8333]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.5583, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0

In [None]:
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image

model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
image_path = 'path/to/your/image.jpg'
image = Image.open(image_path).convert("RGB")
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0)

with torch.no_grad():
	features = model.backbone(image_tensor)