In [5]:
import json
import numpy as np
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf
import tensorflow_hub as hub
from IPython.display import Image, display

# Function to load the Universal Sentence Encoder model
def load_model():
    return hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

# Function to generate embedding for the input text
def embed_text(text, model):
    return model([text]).numpy()[0]

# Function to load data from JSON and prepare for KNN
def load_data(json_file):
    with open(json_file, 'r') as file:
        data = json.load(file)
    vectors = []
    info = []
    for obj in data:  # Assuming data is a list of dictionaries
        if "embedded vector" in obj:
            vectors.append(obj["embedded vector"])
            info.append((obj["Plots"], obj["caption"], obj["mentioned"], obj["web location"]))
    return np.array(vectors), info

# Function to perform KNN search
def knn_search(query, model, vectors, info, n_neighbors=1):
    query_vector = embed_text(query, model)
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree').fit(vectors)
    distances, indices = nbrs.kneighbors([query_vector])
    closest = indices[0][0]
    return info[closest]

# Main function to use for search
def vector_search(query):
    model = load_model()
    vectors, info = load_data('EmbeddedDB.json')
    closest_match = knn_search(query, model, vectors, info)
    return closest_match

# Example usage
query = "Higgs boson production"
result = vector_search(query)

# Output the results
print("Plot Image:")
display(Image(url=result[3]))  # Displaying image from the web location
print("Caption:", result[1])
print("Mentioned:", result[2])
print("Web Location:", result[3])


Plot Image:


Caption: Figure 01a: Representative tree-level Feynman diagrams for the production of the Higgs boson in association with a top pair (ttH) and the subsequent decay of the Higgs to bb, (a) and (b), and for the main background tt+bb (c).
Mentioned: Figure 1a, b show two examples of tree-level diagrams for \(t\bar{t}H\) production with a subsequent \(H\to b\bar{b}\) decay. A search for the associated production of the Higgs boson with a top-quark pair using several Higgs decay modes (including \(H\to b\bar{b}\)) has recently been published by the CMS Collaboration [24] quoting a ratio of the measured \(t\bar{t}H\) signal cross section to the SM expectation for a Higgs boson mass of 125.6 GeV of \(\mu=2.8\pm 1.0\). The signal hypothesis is defined as a SM Higgs boson produced in association with a top-quark pair as shown in Fig. 1a, b. Hence no coupling of the Higgs boson to the \(W\) boson is accounted for in \(|\mathcal{M}_{i}|^{2}\) to allow for a consistent treatment when performing th