In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from PIL import Image
import numpy as np
import io
from torchvision import models, transforms
import torch
import logging

In [39]:
def extract_features(image_url):
    try:
        # Load the pre-trained model
        model = models.vgg16(pretrained=True)  # Example: VGG16
        model.eval()  # Set the model to evaluation mode

        # Define the transformation to preprocess the image
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        # Load the image from the URL with a User-Agent header
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(image_url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses

        img = Image.open(io.BytesIO(response.content)).convert('RGB')
        img_t = preprocess(img)
        batch_t = torch.unsqueeze(img_t, 0)

        # Get the feature vector
        with torch.no_grad():
            features = model(batch_t)

        logging.info(f"Successfully extracted features for {image_url}")
        return features.numpy()
    
    except Exception as e:
        logging.error(f"Error extracting features from {image_url}: {e}")
        return None  # Return None if extraction fails

In [40]:
f_1 = extract_features('https://i.pinimg.com/564x/e2/7d/ac/e27dacb3d16cae459e0e13f51c3cd99f.jpg')



In [42]:
f_2 = extract_features('https://i.pinimg.com/736x/04/51/4b/04514b6528074b8f3f53e7f8bfb88967.jpg')

In [43]:
f_3 = extract_features('https://i.pinimg.com/enabled/564x/b4/95/ee/b495ee1c44ec456d9988abeadda2fd36.jpg')

In [44]:
f_4 = extract_features('https://i.pinimg.com/enabled/564x/6d/27/e3/6d27e380c4378578ad8b0150ad4572cb.jpg')

In [45]:
f_5 = extract_features('https://i.pinimg.com/enabled/564x/7e/3d/9a/7e3d9aa54c0193335cee21abe42af3a7.jpg')

In [46]:
f_6 = extract_features('https://i.pinimg.com/564x/b2/8b/87/b28b87111fe07cb007926d3ee84914d9.jpg')

In [47]:
f_7 = extract_features('https://i.pinimg.com/564x/5f/ec/d0/5fecd0453c81bd46c3a9b39e41292b9f.jpg')

In [53]:
f_8 = extract_features('https://i.pinimg.com/564x/80/67/8e/80678edf3ede3c94b98d7ea81ed41981.jpg')



In [54]:
f_9 = extract_features('https://i.pinimg.com/enabled/564x/e0/09/f6/e009f69030b0e9727d800bc05f9e2de5.jpg')

In [48]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity(f_1,f_2)

array([[0.8704705]], dtype=float32)

In [49]:
cosine_similarity(f_1,f_3)

array([[0.812181]], dtype=float32)

In [50]:
cosine_similarity(f_1,f_4)

array([[0.52790296]], dtype=float32)

In [51]:
cosine_similarity(f_1,f_5)

array([[0.81082374]], dtype=float32)

In [52]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity(f_6,f_7)

array([[0.6615258]], dtype=float32)

In [55]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity(f_8,f_9)

array([[0.86329114]], dtype=float32)