In [14]:
from transformers import ViTImageProcessor, ViTModel
from PIL import Image
import pandas as pd
import os

In [None]:
#load processor and model
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

In [12]:
#load image
image = Image.open("./images/Abboud Abdul Latif Hassan_AL-ZOMOR.jpg")
inputs= processor(images=image, return_tensors="pt")
print(inputs)
outputs=model(**inputs)
last_hidden_states = outputs.last_hidden_state
print(last_hidden_states)
print(outputs)


{'pixel_values': tensor([[[[ 0.9843,  0.9843,  0.9843,  ...,  0.9765,  0.9765,  0.9765],
          [ 0.9843,  0.9843,  0.9843,  ...,  0.9765,  0.9765,  0.9765],
          [ 0.9843,  0.9843,  0.9843,  ...,  0.9765,  0.9765,  0.9765],
          ...,
          [-0.4431, -0.5059, -0.5529,  ..., -0.0510, -0.0588, -0.0667],
          [-0.8980, -0.8824, -0.8353,  ..., -0.1137, -0.1137, -0.1216],
          [-0.8510, -0.8196, -0.8039,  ..., -0.1765, -0.1843, -0.1922]],

         [[ 0.9843,  0.9843,  0.9843,  ...,  0.9922,  0.9922,  0.9922],
          [ 0.9843,  0.9843,  0.9843,  ...,  0.9922,  0.9922,  0.9922],
          [ 0.9843,  0.9843,  0.9843,  ...,  0.9922,  0.9922,  0.9922],
          ...,
          [-0.4431, -0.5059, -0.5529,  ..., -0.4118, -0.4196, -0.4275],
          [-0.8980, -0.8824, -0.8353,  ..., -0.4745, -0.4745, -0.4824],
          [-0.8510, -0.8196, -0.8039,  ..., -0.5373, -0.5451, -0.5529]],

         [[ 0.9843,  0.9843,  0.9843,  ...,  0.9686,  0.9686,  0.9686],
          [ 0

In [17]:


# Directory containing images
image_dir = "./images"

# List to store image data
data = []

# Iterate through all files in the directory
for file in os.listdir(image_dir):
    if file.endswith(".jpg"):
        name_part = file.rsplit(".", 1)[0]  # Remove file extension
        first_name, last_name = name_part.split("_", 1)  # Split by underscore
        data.append({"first_name": first_name, "last_name": last_name, "file_name": file})

df = pd.DataFrame(data)

In [18]:
df.head()

Unnamed: 0,first_name,last_name,file_name
0,Abboud Abdul Latif Hassan,AL-ZOMOR,Abboud Abdul Latif Hassan_AL-ZOMOR.jpg
1,Abd Al Aziz,AWDA,Abd Al Aziz_AWDA.jpg
2,Abd al-Hadi,AL-IRAQI,Abd al-Hadi_AL-IRAQI.jpg
3,Abdullah Ahmed,ABDULLAH,Abdullah Ahmed_ABDULLAH.jpg
4,Abu Hafs,AL-MASRI,Abu Hafs_AL-MASRI.jpg


In [30]:
#embedding function
def get_image_embedding(file_path):
    """Loads an image and returns its embedding."""
    try:
        image = Image.open(file_path)  # Load image
        inputs = processor(images=image, return_tensors="pt")  # Process image
        outputs = model(**inputs)  # Get model output
        return outputs.last_hidden_state.mean(dim=1).detach().numpy().flatten().tolist() # Return mean pooled embedding
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [31]:
# Apply embedding function to each file
df["embeddings"] = df["file_name"].apply(lambda x: get_image_embedding(os.path.join(image_dir, x)))

# Display DataFrame
print(df)

                     first_name           last_name  \
0     Abboud Abdul Latif Hassan            AL-ZOMOR   
1                   Abd Al Aziz                AWDA   
2                   Abd al-Hadi            AL-IRAQI   
3                Abdullah Ahmed            ABDULLAH   
4                      Abu Hafs            AL-MASRI   
5                           Abu               ABBAS   
6                       Alcides        RAMON MAGANA   
7             Arcangel de Jesus       HENAO MONTOYA   
8              Benjamin Alberto      ARELLANO FELIX   
9                        Chi Fu               CHANG   
10                         Daut           HARADINAJ   
11                Gilberto Jose  RODRIGUEZ OREJUELA   
12                   Hsueh Kang                 WEI   
13                       Ismael    HIGUERA GUERRERO   
14                Jose de Jesus   AMEZCUA CONTRERAS   
15                        Lased            BEN HENI   
16                 Luis Ignacio   AMEZCUA CONTRERAS   
17        

In [34]:
df.head()

Unnamed: 0,first_name,last_name,file_name,embeddings
0,Abboud Abdul Latif Hassan,AL-ZOMOR,Abboud Abdul Latif Hassan_AL-ZOMOR.jpg,"[-0.21107277274131775, 0.2780708968639374, -0...."
1,Abd Al Aziz,AWDA,Abd Al Aziz_AWDA.jpg,"[-0.11320845782756805, 0.15312974154949188, -0..."
2,Abd al-Hadi,AL-IRAQI,Abd al-Hadi_AL-IRAQI.jpg,"[-0.04537868872284889, 0.1974550485610962, -0...."
3,Abdullah Ahmed,ABDULLAH,Abdullah Ahmed_ABDULLAH.jpg,"[-0.04190438240766525, -0.03621479496359825, -..."
4,Abu Hafs,AL-MASRI,Abu Hafs_AL-MASRI.jpg,"[-0.2795782685279846, 0.10232053697109222, 0.1..."


In [33]:
df.to_csv("./sanctions_embeddings.csv")

In [35]:
print(len(df["embeddings"][0]))

768
