In [1]:
!pip install facenet-pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m64.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import pandas as pd
merged_df = pd.read_csv('drive/MyDrive/merged.csv')

In [None]:
import requests
from PIL import Image, UnidentifiedImageError
from io import BytesIO
import numpy as np
from skimage import metrics
import torch
import torchvision.transforms as transforms
from facenet_pytorch import MTCNN, InceptionResnetV1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize face detection model
mtcnn = MTCNN(keep_all=True, device=device)

# Initialize face recognition model
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Define function to get facial features from image URL
def get_face_features(url):
    
    # Download image from URL and convert to PyTorch tensor
    img = transforms.ToTensor()(Image.open(BytesIO(requests.get(url).content)).convert('RGB'))
    try:
        # Use MTCNN to detect face in image
        faces, _ = mtcnn(img)
    except (RuntimeError, IndexError):
        return None
    
    # Return None if no face detected
    if faces is None:
        return None
    
    # Use FaceNet to extract facial features from detected face
    face = faces[0].to(device)
    with torch.no_grad():
        features = resnet(face.unsqueeze(0)).cpu().numpy()
    
    return features.flatten()
   

# Define function to compare facial features and determine if images match
def compare_faces(url1, url2):
    # Get facial features from each image URL
    features1 = get_face_features(url1)
    features2 = get_face_features(url2)
    
    # Return False if face not detected in one or both images
    if features1 is None or features2 is None:
        return None
    
    # Calculate cosine similarity between facial features
    similarity = np.dot(features1, features2) / (np.linalg.norm(features1) * np.linalg.norm(features2))
    
    # Return True if similarity is above threshold, False otherwise
    return similarity

merged_df['image_match_ssim'] = None
merged_df['facial_recognition'] = None
size = (550, 723)
for index, row in merged_df.iterrows():
    url1 = row['first_photo_url'] # yelp image
    url2 = row['user_image_url'] # tinder image
    if pd.isna(url1) or pd.isna(url2):
        continue
    try:
        img1 = Image.open(BytesIO(requests.get(url1).content)).resize(size)
        img2 = Image.open(BytesIO(requests.get(url2).content)).resize(size)
        arr1 = np.array(img1)
        arr2 = np.array(img2)
        if arr1.shape != arr2.shape:
            continue
        ssim_score = metrics.structural_similarity(arr1, arr2, win_size=3, data_range=arr1.max() - arr1.min(), multichannel=True)
        merged_df.at[index, 'image_match'] = ssim_score
        merged_df.at[index, 'facial_recognition'] = compare_faces(url1, url2)
    except (OSError, UnidentifiedImageError, IndexError):
        continue

  ssim_score = metrics.structural_similarity(arr1, arr2, win_size=3, data_range=arr1.max() - arr1.min(), multichannel=True)


In [None]:
merged_df.to_csv('drive/MyDrive/image_similarity.csv')