In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.0-py3-none-any.whl (474 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.3/474.3 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K 

In [None]:
import torch
import numpy as np
from tqdm import tqdm
from typing import List, Union, Tuple
from torch.utils.data import DataLoader
import PIL
from transformers import CLIPModel, CLIPProcessor
from datasets import Dataset, Image



class PLIP:


    def __init__(self, model_name, auth_token=None):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_name = model_name
        self.model, self.preprocess, self.model_hash = self._load_model(model_name, auth_token=auth_token)
        self.model = self.model.to(self.device)


    def _load_model(self,
                    name: str,
                    device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu",
                    auth_token=None):

        model = CLIPModel.from_pretrained(name, use_auth_token=auth_token)
        preprocessing = CLIPProcessor.from_pretrained(name, use_auth_token=auth_token)

        return model, preprocessing, hash

    def encode_images(self, images: Union[List[str], List[PIL.Image.Image]], batch_size: int):
        def transform_fn(el):
            imgs = el['image'] if isinstance(el['image'][0], PIL.Image.Image) else [Image().decode_example(_) for _ in
                                                                                    el['image']]
            return self.preprocess(images=imgs, return_tensors='pt')

        dataset = Dataset.from_dict({'image': images})
        dataset = dataset.cast_column('image', Image(decode=False)) if isinstance(images[0], str) else dataset
        # dataset = dataset.map(map_fn,
        #             batched=True,
        #             remove_columns=['image'])
        dataset.set_format('torch')
        dataset.set_transform(transform_fn)
        dataloader = DataLoader(dataset, batch_size=batch_size)
        image_embeddings = []
        pbar = tqdm(total=len(images) // batch_size, position=0)
        with torch.no_grad():
            for batch in dataloader:
                batch = {k: v.to(self.device) for k, v in batch.items()}
                image_embeddings.extend(self.model.get_image_features(**batch).detach().cpu().numpy())
                pbar.update(1)
            pbar.close()
        return np.stack(image_embeddings)

    def encode_text(self, text: List[str], batch_size: int):
        dataset = Dataset.from_dict({'text': text})
        dataset = dataset.map(lambda el: self.preprocess(text=el['text'], return_tensors="pt",
                                                         max_length=77, padding="max_length", truncation=True),
                              batched=True,
                              remove_columns=['text'])
        dataset.set_format('torch')
        dataloader = DataLoader(dataset, batch_size=batch_size)
        text_embeddings = []
        pbar = tqdm(total=len(text) // batch_size, position=0)
        with torch.no_grad():
            for batch in dataloader:
                batch = {k: v.to(self.device) for k, v in batch.items()}
                text_embeddings.extend(self.model.get_text_features(**batch).detach().cpu().numpy())
                pbar.update(1)
            pbar.close()
        return np.stack(text_embeddings)

    def _cosine_similarity(self, key_vectors: np.ndarray, space_vectors: np.ndarray, normalize=True):
        if normalize:
            key_vectors = key_vectors / np.linalg.norm(key_vectors, ord=2, axis=-1, keepdims=True)
        return np.matmul(key_vectors, space_vectors.T)

    def _nearest_neighbours(self, k, key_vectors, space_vectors, normalize=True, debug=False):
        if type(key_vectors) == List:
            key_vectors = np.array(key_vectors)
        if type(space_vectors) == List:
            space_vectors = np.array(space_vectors)

        cosine_sim = self._cosine_similarity(key_vectors, space_vectors, normalize=normalize)
        nn = cosine_sim.argsort()[:, -k:][:, ::-1]

        return nn

    def zero_shot_classification(self, images, text_labels: List[str], debug=False):
        """
        Perform zero-shot image classification
        :return:
        """
        # encode text
        text_vectors = self.encode_text(text_labels, batch_size=8)
        # encode images
        image_vectors = self.encode_images(images, batch_size=8)
        # compute cosine similarity
        cosine_sim = self._cosine_similarity(image_vectors, text_vectors)
        if debug:
            print(cosine_sim)
        preds = np.argmax(cosine_sim, axis=-1)
        return [text_labels[idx] for idx in preds]

    def retrieval(self, queries: List[str], top_k: int = 10):
        """
        Image retrieval from queries
        :return:
        """
        # encode text
        text_vectors = self.encode_text(queries, batch_size=8)
        # compute cosine similarity
        # cosine_sim = self._cosine_similarity(text_vectors, self.image_vectors)
        return self._nearest_neighbours(k=top_k, key_vectors=text_vectors, space_vectors=self.image_vectors)

        # return np.argmax(cosine_sim, axis=-1)
        # return cosine_sim.argsort()[:,-top_k:][:,::-1]



In [None]:
import numpy as np

plip = PLIP('vinid/plip')




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.46k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]



In [None]:
import json

with open('data/captions.json', 'r') as f:
    data = json.load(f)

# Get list of captions in the data set
# all_captions = [data[key]['caption'] for key in data.keys()]
all_captions = {data[key]['caption']: key for key in data.keys()}
captions = all_captions
print(captions)

{'ER expression in tumor tissue. IHC staining, original': '0', 'Nuclear expression of TS (brown) in a colon carcinoma': '1', 'Nuclear expression of E2F1 (brown) in a colon carcinoma. This is higher magnification of the upper portion of a core shown in an inset (lower left corner)': '2', 'Cytoplasmic immunoexpression of PD-L1 in oral squamous cell carcinomas with poorer prognosis (OSCCPP). Immunohistochemistry. Total magnification x100': '3', 'Nuclear and perinuclear immunoexpression of Foxp3 in oral squamous cell carcinomas with poorer prognosis (OSCCPP). Immunohistochemistry. Total magnification x100': '4', 'Cytoplasmic immunoexpression of PD-L1 in oral squamous cell carcinomas with better prognosis (OSCCBP). Immunohistochemistry. Total magnification x100': '5', 'Nuclear and perinuclear immunoexpression of Foxp3 in oral squamous cell carcinomas with better prognosis (OSCCBP). Immunohistochemistry. Total magnification x100': '6', 'Cytoplasmic immunoexpression of PD-L1 in control. Immun

In [None]:
def getQueryEmbedding(query):
  query_embedding = plip.encode_text(query, batch_size=32)
  query_embedding = query_embedding/np.linalg.norm(query_embedding, ord=2, axis=-1, keepdims=True)
  return query_embedding

In [None]:
query = ["Lymphoepithelial carcinoma"]
query_embedding = getQueryEmbedding(query)

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

1it [00:00,  3.78it/s]


In [None]:
# we create image embeddings and text embeddings

caption_embeddings = plip.encode_text(captions.values(), batch_size=32)

# we normalize the embeddings to unit norm (so that we can use dot product instead of cosine similarity to do comparisons)
# image_embeddings = image_embeddings/np.linalg.norm(image_embeddings, ord=2, axis=-1, keepdims=True)
caption_embeddings = caption_embeddings/np.linalg.norm(caption_embeddings, ord=2, axis=-1, keepdims=True)

caption_embeddings_pairs = list(zip(captions, caption_embeddings))

Map:   0%|          | 0/3285 [00:00<?, ? examples/s]

103it [08:36,  5.01s/it]


In [None]:
# !pip install -U torch transformers

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import CosineSimilarity
from transformers import CLIPTokenizer, CLIPModel, CLIPTextModel
import tensorflow as tf
cossim = CosineSimilarity(dim=0, eps=1e-6)

In [None]:
def cos_sim(v1, v2):
  return F.cosine_similarity(torch.tensor(v1), torch.tensor(v2))

In [None]:
def best_captions_cossim(query, captionPairs, n):
  results = [0]
  results_captions = ['']
  for j in range(len(captionPairs)):
    eval = cos_sim(query, captionPairs[j][1])
    for i in range(min(len(results), n)):
      if results[i] < eval:
        results.insert(i, eval)
        results_captions.insert(0, captionPairs[j][0])
        break
  return results[:n], results_captions[:n]

results, results_captions = best_captions_cossim(query_embedding, caption_embeddings_pairs, 5)
print(results)
print(results_captions)

[tensor([0.8022]), tensor([0.8005]), tensor([0.7984]), tensor([0.7979]), tensor([0.7958])]
['Photomicrograph showing inflammatory cell infiltrate consisting of lymphocytes (1), plasma cells (2), macrophages (3), and mast cells(4) (H&E, x10)', 'Salivary duct carcinoma  cribriform growth pattern of the neoplastic cells with surrounding hyaline sclerosis of the stroma (H and E, 10 x10)', 'Case 2: High-power photomicrograph showing anaplastic changes in the cystic epithelium (H and E, x400)', 'Photomicrograph showing less number of alpha-smooth muscle actin positive cells in the fibrous wall in dentigerous cyst (IHC stain, x100)', 'p53 positive cells in the periphery of the epithelial whorls (IHC stain, x100)']


In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [None]:
# Using fiass to evaluate dot product similarity
import faiss

# accept matrix of word embeddings
def dotProdSim(query, caption):

  query = query.reshape(1, -1)
  caption = caption.reshape(1, -1)

  dim = query.shape[1]

  # dim = len(embeddings)
  index = faiss.IndexFlatIP(dim)
  index.add(query)

  # search finds the dot product similarity (and indices); k is the number of embeddings
  results, ind = index.search(caption, k=5)
  return results[0][0]

In [None]:
def best_captions_dotProdSim(query, captionPairs, n):
  results = [0]
  results_captions = ['']
  for j in range(len(captionPairs)):
    eval = dotProdSim(query, captionPairs[j][1])
    for i in range(min(len(results), n)):
      # want smaller values
      if results[i] < eval:
        results.insert(i, eval)
        results_captions.insert(0, captionPairs[j][0])
        break
      elif i == min(len(results), n) - 1:
        results.append(eval)
        results_captions.append(captionPairs[j][0])
  return results[:n], results_captions[:n]

  results, results_captions = best_captions_dotProdSim(query_embedding, caption_embeddings_pairs, 5)
print(results)
print(results_captions)

[tensor([0.8022]), tensor([0.8005]), tensor([0.7984]), tensor([0.7979]), tensor([0.7958])]
['Photomicrograph showing inflammatory cell infiltrate consisting of lymphocytes (1), plasma cells (2), macrophages (3), and mast cells(4) (H&E, x10)', 'Salivary duct carcinoma  cribriform growth pattern of the neoplastic cells with surrounding hyaline sclerosis of the stroma (H and E, 10 x10)', 'Case 2: High-power photomicrograph showing anaplastic changes in the cystic epithelium (H and E, x400)', 'Photomicrograph showing less number of alpha-smooth muscle actin positive cells in the fibrous wall in dentigerous cyst (IHC stain, x100)', 'p53 positive cells in the periphery of the epithelial whorls (IHC stain, x100)']


In [None]:
# Using fiass to evaluate L2 similarity
import faiss

# accept matrix of word embeddings
def l2Sim(query, caption):
  query = query/np.linalg.norm(query, ord=2, axis=-1, keepdims=True)
  caption = caption/np.linalg.norm(caption, ord=2, axis=-1, keepdims=True)

  query = query.reshape(1, -1)
  caption = caption.reshape(1, -1)

  query = query.reshape(1, -1)
  caption = caption.reshape(1, -1)

  dim = query.shape[1]
  index = faiss.IndexFlatL2(dim)
  index.add(query)

  # search finds the L2 similarity (and indices); k is the number of embeddings
  results, ind = index.search(caption, k=5)
  return results[0][0]

In [None]:
def best_captions_l2(query, captionPairs, n):
  results = [0]
  results_captions = ['']
  for j in range(len(captionPairs)):
    eval = l2Sim(query, captionPairs[j][1])
    for i in range(min(len(results), n)):
      # want smaller values
      if results[i] > eval:
        results.insert(i, eval)
        results_captions.insert(0, captionPairs[j][0])
        break
      elif i == min(len(results), n) - 1:
        results.append(eval)
        results_captions.append(captionPairs[j][0])
  return results[:n], results_captions[:n]

  results, results_captions = best_captions_l2(query_embedding, caption_embeddings_pairs, 5)
print(results)
print(results_captions)

[tensor([0.8022]), tensor([0.8005]), tensor([0.7984]), tensor([0.7979]), tensor([0.7958])]
['Photomicrograph showing inflammatory cell infiltrate consisting of lymphocytes (1), plasma cells (2), macrophages (3), and mast cells(4) (H&E, x10)', 'Salivary duct carcinoma  cribriform growth pattern of the neoplastic cells with surrounding hyaline sclerosis of the stroma (H and E, 10 x10)', 'Case 2: High-power photomicrograph showing anaplastic changes in the cystic epithelium (H and E, x400)', 'Photomicrograph showing less number of alpha-smooth muscle actin positive cells in the fibrous wall in dentigerous cyst (IHC stain, x100)', 'p53 positive cells in the periphery of the epithelial whorls (IHC stain, x100)']


In [None]:
# Function to find top results based on queries

def findTopResults(query, captionPairs = caption_embeddings_pairs, n = 5):
  query_embedding = getQueryEmbedding([query])

  cossim_results, cossim_captions = best_captions_cossim(query_embedding, captionPairs, n);
  dotProdSim_results, dotProdSim_captions = best_captions_dotProdSim(query_embedding, captionPairs, n);
  l2_results, l2_captions = best_captions_l2(query_embedding, captionPairs, n);

  cossim = list(zip(cossim_captions, cossim_results))
  dotProdSim = list(zip(dotProdSim_captions, dotProdSim_results))
  l2Sim = list(zip(l2_captions, l2_results))
  print("Cosine Similarity results:")
  print(cossim)
  print("Dot Product Similarity results:")
  print(dotProdSim)
  print("L2 Similarity results:")
  print(l2Sim)

  # return cossim_results, cossim_captions, dotProdSim_results, dotProdSim_captions, l2_results, l2_captions



In [None]:
findTopResults("tumor")

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

1it [00:00,  1.87it/s]


Cosine Similarity results:
[('Histopathological image showing tumor cells arranged in solid pattern with few cells showing abnormal mitosis (H&E stain, x400)', tensor([0.8520])), ('Histopathological image of decalcified section showing intraosseous involvement of solid pattern of adenoid cystic carcinoma (H&E stain, x100)', tensor([0.8477])), ('alphaSMA positive cells present only in the wall of blood vessels, while subepithelial connective tissue shows very few scattered cells. (IHC stain x200)', tensor([0.8466])), ('Orthokeratinized odontogenic cyst: Stratified squamous epithelial lining with surface thick layer of orthokeratin (H&E stain, x100)', tensor([0.8460])), ('Vascular proliferation with thickened blood vessel walls showing large plump endothelial cells having oval nuclei with vacuolated cytoplasm (H&E stain, x400)', tensor([0.8442]))]
Dot Product Similarity results:
[('Histopathological image showing tumor cells arranged in solid pattern with few cells showing abnormal mitos

In [None]:
findTopResults("lymphoepithelial carcinoma")

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

1it [00:00,  1.93it/s]


Cosine Similarity results:
[('Photomicrograph showing inflammatory cell infiltrate consisting of lymphocytes (1), plasma cells (2), macrophages (3), and mast cells(4) (H&E, x10)', tensor([0.8022])), ('Salivary duct carcinoma  cribriform growth pattern of the neoplastic cells with surrounding hyaline sclerosis of the stroma (H and E, 10 x10)', tensor([0.8005])), ('Case 2: High-power photomicrograph showing anaplastic changes in the cystic epithelium (H and E, x400)', tensor([0.7984])), ('Photomicrograph showing less number of alpha-smooth muscle actin positive cells in the fibrous wall in dentigerous cyst (IHC stain, x100)', tensor([0.7979])), ('p53 positive cells in the periphery of the epithelial whorls (IHC stain, x100)', tensor([0.7958]))]
Dot Product Similarity results:
[('Photomicrograph showing inflammatory cell infiltrate consisting of lymphocytes (1), plasma cells (2), macrophages (3), and mast cells(4) (H&E, x10)', 0.80217755), ('Salivary duct carcinoma  cribriform growth patt

In [None]:
findTopResults("streptococcal tonsillitis")

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

1it [00:00,  5.01it/s]


Cosine Similarity results:
[('Intra-granuloma T cells. (a) High magnification (HE). Intra-granuloma lymphocytes are visible.', tensor([0.5822])), ('CD 31 expression in lesional areas (40x)', tensor([0.5702])), ('Photomicrograph showing spindle-shaped and stellate cells in a copious myxoid stroma (H & E stain, x40)', tensor([0.5689])), ('Hematoxylin and eosin-stained section (4x) showing dentigerous cyst in the mandibular left quadrant in continuity with the connective tissue stroma having cemental masses. Inset shows higher magnification (40x) of the cyst epithelium resembling the reduced enamel epithelium', tensor([0.5662])), ('Histological image of skin from the scalp showing acantholysis in the upper third of the epidermis in PF (x20 magnification Haematoxylin & Eosin)', tensor([0.5657]))]
Dot Product Similarity results:
[('Intra-granuloma T cells. (a) High magnification (HE). Intra-granuloma lymphocytes are visible.', 0.58216465), ('CD 31 expression in lesional areas (40x)', 0.5701

In [None]:
findTopResults("Microscopic features of the MFH tumors in cases 1 (A, B) and 2 (C, D). They are quite similar. The storiform-pleomorphic patterns seen in MFH cases 1 and 2 are shown in A and C, respectively. Proliferation of spindle, bizarre, pleomorphic and multi-nucleated giant cells in MFH cases 1 and 2 are shown in B and D, respectively. In comparison with case 1, the tumor from case 2 had slightly higher cellularity and more frequent multi-nucleated giant cells. (H&E staining, original magnification A, x100; B, x400; C and D, x200).")

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

1it [00:00,  4.34it/s]


Cosine Similarity results:
[('Plasma cells.', tensor([0.4160])), ('Nuclear and perinuclear immunoexpression of Foxp3 in oral squamous cell carcinomas with better prognosis (OSCCBP). Immunohistochemistry. Total magnification x100', tensor([0.3928])), ('Cytoplasmic immunoexpression of PD-L1 in oral squamous cell carcinomas with better prognosis (OSCCBP). Immunohistochemistry. Total magnification x100', tensor([0.3879])), ('Nuclear and perinuclear immunoexpression of Foxp3 in oral squamous cell carcinomas with poorer prognosis (OSCCPP). Immunohistochemistry. Total magnification x100', tensor([0.3830])), ('Cytoplasmic immunoexpression of PD-L1 in oral squamous cell carcinomas with poorer prognosis (OSCCPP). Immunohistochemistry. Total magnification x100', tensor([0.3816]))]
Dot Product Similarity results:
[('Plasma cells.', 0.41598576), ('Nuclear and perinuclear immunoexpression of Foxp3 in oral squamous cell carcinomas with better prognosis (OSCCBP). Immunohistochemistry. Total magnificat

In [None]:
def findImage(caption):
  uuid = data[captions.get(caption)]['uuid']


Query = "tumor"

Cosine Similarity results:

[('Carcinoma in situ of the urothelium.', tensor(0.8429)), ('The tumor was positive for alpha-smooth muscle actin', tensor(0.8075)), ('Clinical photograph of the patient did not reveal any facial asymmetry', tensor(0.8063)), ('At the base, the lesion boundaries tended to be well demarcated.', tensor(0.8026)), ('In the periphery of the mass, normal breast tissue is identified.', tensor(0.7996))]


Dot Product Similarity results:

[('Carcinoma in situ of the urothelium.', 0.8429143), ('The tumor was positive for alpha-smooth muscle actin', 0.8075356), ('Clinical photograph of the patient did not reveal any facial asymmetry', 0.80628717), ('At the base, the lesion boundaries tended to be well demarcated.', 0.8026052), ('In the periphery of the mass, normal breast tissue is identified.', 0.7996011)]


L2 Similarity results:

[('Carcinoma in situ of the urothelium.', 0), ('The tumor was positive for alpha-smooth muscle actin', 0.3141714), ('At the base, the lesion boundaries tended to be well demarcated.', 0.3849293), ('In the periphery of the mass, normal breast tissue is identified.', 0.38742572), ('Tumor cells strongly positive for myoglobin.', 0.39478934)]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Query = "lymphoepithelial carcinoma"

Cosine Similarity results:

[('Medium power image illustrates the plasma cells and lymphocytes surrounding the tumor.', tensor(0.8406)), ('Urothelial dysplasia.', tensor(0.8404)), ('Classic papillary carcinoma H&E.', tensor(0.8240)), ('The tumour is dispersed by lymphocytes, eosinophils, histiocytes and plasma cells (high power image).', tensor(0.8207)), ('The tumor was positive for alpha-smooth muscle actin', tensor(0.8142))]

Dot Product Similarity results:

[('Medium power image illustrates the plasma cells and lymphocytes surrounding the tumor.', 0.84060705), ('Urothelial dysplasia.', 0.84040517), ('Classic papillary carcinoma H&E.', 0.82396585), ('The tumour is dispersed by lymphocytes, eosinophils, histiocytes and plasma cells (high power image).', 0.8207497), ('The tumor was positive for alpha-smooth muscle actin', 0.8141506)]

L2 Similarity results:

[('Medium power image illustrates the plasma cells and lymphocytes surrounding the tumor.', 0), ('The tumour is dispersed by lymphocytes, eosinophils, histiocytes and plasma cells (high power image).', 0.31878614), ('The tumor was positive for alpha-smooth muscle actin', 0.31918973), ('Tumor cells strongly positive for myoglobin.', 0.35206872), ('Nearly all tumor cells are immunoreactive for smooth muscle actin.', 0.3585004)]

Query = streptococcal tonsillitis

Cosine Similarity results:

[('Negative result of patients sample', tensor(0.6884)), ('Case 1 FDCS of the tonsil, the tumor cells are immunoreactive with CD21 strongly and diffusely.', tensor(0.6716)), ('Case 1 FDCS of the tonsil, individual cells possess characteristic features, including a delicate nuclear membrane, vesicular nuclei, small but distinct nucleoli and indistinct cell borders. (H&E stain).', tensor(0.6677)), ('The tumour is dispersed by lymphocytes, eosinophils, histiocytes and plasma cells (high power image).', tensor(0.6582)), ('Odontogenic epithelial cells arranged in the form of follicles and stellate reticulum like cells in the center which are surrounded by ectomesenchymal cells (4x)', tensor(0.6381))]

Dot Product Similarity results:

[('Negative result of patients sample', 0.6884264), ('Case 1 FDCS of the tonsil, the tumor cells are immunoreactive with CD21 strongly and diffusely.', 0.67158103), ('Case 1 FDCS of the tonsil, individual cells possess characteristic features, including a delicate nuclear membrane, vesicular nuclei, small but distinct nucleoli and indistinct cell borders. (H&E stain).', 0.66772413), ('The tumour is dispersed by lymphocytes, eosinophils, histiocytes and plasma cells (high power image).', 0.6581726), ('Odontogenic epithelial cells arranged in the form of follicles and stellate reticulum like cells in the center which are surrounded by ectomesenchymal cells (4x)', 0.63807267)]

L2 Similarity results:

[('Negative result of patients sample', 0), ('Case 1 FDCS of the tonsil, the tumor cells are immunoreactive with CD21 strongly and diffusely.', 0.62314737), ('Case 1 FDCS of the tonsil, individual cells possess characteristic features, including a delicate nuclear membrane, vesicular nuclei, small but distinct nucleoli and indistinct cell borders. (H&E stain).', 0.65683794), ('Odontogenic epithelial cells arranged in the form of follicles and stellate reticulum like cells in the center which are surrounded by ectomesenchymal cells (4x)', 0.6645518), ('Photograph showing proliferating cell nuclear antigen expression in follicular area of Tonsil which was used as positive control (IHC stain, x200)x200 (positive control)', 0.683655)]

Query: "Microscopic features of the MFH tumors in cases 1 (A, B) and 2 (C, D). They are quite similar. The storiform-pleomorphic patterns seen in MFH cases 1 and 2 are shown in A and C, respectively. Proliferation of spindle, bizarre, pleomorphic and multi-nucleated giant cells in MFH cases 1 and 2 are shown in B and D, respectively. In comparison with case 1, the tumor from case 2 had slightly higher cellularity and more frequent multi-nucleated giant cells. (H&E staining, original magnification A, x100; B, x400; C and D, x200)."

Cosine Similarity results:

[('Histopathology of duodenal neuroendocrine tumors in NF1 patients. A. A low-power view of duodenal neuroendocrine tumor shows characteristic nested architecture. Hematoxylin-and-eosin, 10X. B. A high-power view shows characteristic cytologic features, including monotonous round nuclei with finely stippled chromatin, amphophilic granular cytoplasm, and psammomatous calcifications. Hematoxylin-and-eosin, 20X.', tensor(1.)), ('Tumor cells arranged in a storiform pattern and in sheets of alternating hypercellular and hypocellular areas (H&E stain, x40)', tensor(0.6282)), ('(a) Photomicrograph of tumor cells showing areas of tumor osteoid with vascular spaces and benign appearing multinucleated giant cells (H&E stain x100) (b) Photomicrograph of pleomorphic tumor cells showing areas of tumor osteoid with vascular spaces (H&E stain x400)', tensor(0.6004)), ('(a) Photomicrograph of vascular spaces separated by benign appearing osteoclastic multinucleated giant cells and strands of neoplastic malignant mononuclear cells (H&E stain x100). (b) Photomicrograph of vascular spaces separated by strands of neoplastic malignant mononuclear cells with tumor osteoid (H&E stain x400)', tensor(0.5849)), ('Photomicrograph shows pleomorphic and anaplastic spindle cells resembling pleomorphic RMS (H&E stain, x400)', tensor(0.5788))]

Dot Product Similarity results:

[('Histopathology of duodenal neuroendocrine tumors in NF1 patients. A. A low-power view of duodenal neuroendocrine tumor shows characteristic nested architecture. Hematoxylin-and-eosin, 10X. B. A high-power view shows characteristic cytologic features, including monotonous round nuclei with finely stippled chromatin, amphophilic granular cytoplasm, and psammomatous calcifications. Hematoxylin-and-eosin, 20X.', 0.99999994), ('Tumor cells arranged in a storiform pattern and in sheets of alternating hypercellular and hypocellular areas (H&E stain, x40)', 0.6282424), ('(a) Photomicrograph of tumor cells showing areas of tumor osteoid with vascular spaces and benign appearing multinucleated giant cells (H&E stain x100) (b) Photomicrograph of pleomorphic tumor cells showing areas of tumor osteoid with vascular spaces (H&E stain x400)', 0.60044014), ('(a) Photomicrograph of vascular spaces separated by benign appearing osteoclastic multinucleated giant cells and strands of neoplastic malignant mononuclear cells (H&E stain x100). (b) Photomicrograph of vascular spaces separated by strands of neoplastic malignant mononuclear cells with tumor osteoid (H&E stain x400)', 0.58493596), ('Photomicrograph shows pleomorphic and anaplastic spindle cells resembling pleomorphic RMS (H&E stain, x400)', 0.5788199)]

L2 Similarity results:

[('Histopathology of duodenal neuroendocrine tumors in NF1 patients. A. A low-power view of duodenal neuroendocrine tumor shows characteristic nested architecture. Hematoxylin-and-eosin, 10X. B. A high-power view shows characteristic cytologic features, including monotonous round nuclei with finely stippled chromatin, amphophilic granular cytoplasm, and psammomatous calcifications. Hematoxylin-and-eosin, 20X.', 0), ('Tumor cells arranged in a storiform pattern and in sheets of alternating hypercellular and hypocellular areas (H&E stain, x40)', 9.663726e-14), ('(a) Photomicrograph of tumor cells showing areas of tumor osteoid with vascular spaces and benign appearing multinucleated giant cells (H&E stain x100) (b) Photomicrograph of pleomorphic tumor cells showing areas of tumor osteoid with vascular spaces (H&E stain x400)', 0.743515), ('(a) Photomicrograph of vascular spaces separated by benign appearing osteoclastic multinucleated giant cells and strands of neoplastic malignant mononuclear cells (H&E stain x100). (b) Photomicrograph of vascular spaces separated by strands of neoplastic malignant mononuclear cells with tumor osteoid (H&E stain x400)', 0.7991197), ('Photomicrograph shows pleomorphic and anaplastic spindle cells resembling pleomorphic RMS (H&E stain, x400)', 0.8301282)]