# Image Search

Problem: Given a image find the top n images that a user will most likely click.

In [49]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Concatenate, Flatten, Dot
# NEW: Import a pre-trained vision model
from tensorflow.keras.applications import MobileNetV2

### Create Test data

1. List of 512 users
2. Images in databse say 2_000
3. Training samples 50_000


#### Create unqiue images

In [50]:
NUM_USERS = 1_000  # Total unique users in the system.
NUM_IMAGES = 5_000  # Total unique images in your database.
# e.g., 100 different object labels like 'dog', 'car', 'beach'.
NUM_LABELS = 100
# The number of training examples (past user interactions).
NUM_SAMPLES = 200_000

# Define the dimensionality of our pre-computed embeddings.
# This would be determined by the vision model you used (e.g., MobileNetV2 outputs 1280).
EMBEDDING_DIM = 1_280

In [51]:
# --- 2. Simulate the Pre-computed "Pools" of Data ---
# In a real project, you would load this data from your database.

# A pool of pre-computed embeddings for every unique image.
unique_image_embeddings = np.random.rand(
    NUM_IMAGES, EMBEDDING_DIM).astype(np.float32)

# A pool of pre-computed metadata (e.g., a primary object label) for each image.
unique_image_labels = np.random.randint(0, NUM_LABELS, size=NUM_IMAGES)

print(
    f"Simulated {NUM_IMAGES} unique images with {EMBEDDING_DIM}-dim embeddings.")

Simulated 5000 unique images with 1280-dim embeddings.


In [52]:
# --- 3. Simulate the Training Data (User Interactions) ---
# This creates the 200,000 training samples.

# For each sample, randomly assign a user, a query image, and a candidate image.
user_ids = np.random.randint(0, NUM_USERS, size=NUM_SAMPLES)
query_image_ids = np.random.randint(0, NUM_IMAGES, size=NUM_SAMPLES)
candidate_image_ids = np.random.randint(0, NUM_IMAGES, size=NUM_SAMPLES)
# Position in search results (1-20)
positions = np.random.randint(1, 21, size=NUM_SAMPLES)

In [70]:
# --- 4. Gather the Actual Data for Training Using the IDs ---
# We use the IDs to pull the corresponding pre-computed data from our unique pools.

query_embeddings_for_training = unique_image_embeddings[query_image_ids]
candidate_embeddings_for_training = unique_image_embeddings[candidate_image_ids]
candidate_labels_for_training = unique_image_labels[candidate_image_ids]

In [77]:
# --- 5. Engineer the 'click' Label ---
# We create a logical pattern for the model to learn. A click is more likely if:
# 1. The query and candidate images are visually similar.
# 2. The candidate image appeared in a high position (e.g., top 5).
similarity = np.sum(query_embeddings_for_training *
                    candidate_embeddings_for_training, axis=1)
click_probability = (similarity > 0.1 * EMBEDDING_DIM) & (positions < 10)
clicks = np.array(click_probability, dtype=int)

In [81]:
import numpy as np

v, c = np.unique(clicks, return_counts=True)
print(v, c)

[0 1] [109588  90412]


In [83]:
# --- 6. Define the Model Architecture ---

# --- Input Layers ---
# The model expects pre-computed vectors, not raw images.
user_id_input = Input(shape=(1,), name='user_id_input')
query_embedding_input = Input(shape=(EMBEDDING_DIM,), name='query_embedding_input')
candidate_embedding_input = Input(shape=(EMBEDDING_DIM,), name='candidate_embedding_input')
candidate_label_input = Input(shape=(1,), name='candidate_label_input')
position_input = Input(shape=(1,), name='position_input')