# Getting embeddings for queries and snippets

This notebook demonstrates how to get snippet/query embeddings with pre-trained models

© 2020 Nokia

Licensed under the BSD 3 Clause license

SPDX-License-Identifier: BSD-3-Clause

In [1]:
from codesearch.utils import load_model
from codesearch.data_config import MODELS

### List which pretrained models are available

In [2]:
sorted(MODELS)

['ensemble-embedder-pacs',
 'ncs-embedder-so-ds-feb20',
 'ncs-embedder-staqc-py',
 'tnbow-embedder-so-ds-feb20',
 'use-embedder-pacs']

### Load one of the models

In [3]:
modelname = "../nbs/ncs/best_ncs_embedder/"
embedder = load_model(modelname)

codesearch.ncs.ncs_embedder.NcsEmbedder
codesearch.encoders.BasicEncoder


### Embed queries

In [4]:
query_embs = embedder.embed_queries(["hide android keyboard"])

query_embs.shape

Initializing spacy nlp /
Initialized spacy nlp
Embedding sequences: 100%|██████████| 1/1 [00:04<00:00,  4.50s/it]


(1, 100)

### Embed snippets

In [5]:
language = "python"
code = \
"""
protected final boolean hideKeyboard() {
        return SillyAndroid.hideKeyboard(this);
    }
"""
snippet = {"description": "hide keyboard", 
           "code": code,
           "language": "python"
          }
snippet_embs = embedder.embed_snippets([snippet])
snippet_embs.shape

Embedding snippets: 100%|██████████| 1/1 [00:00<00:00, 46.26it/s]


(1, 100)

### Compute the similarity between a query and snippet

In [6]:
import numpy as np

query_embs_n = query_embs/(np.linalg.norm(query_embs, axis=1, keepdims=True))
snippet_embs_n = snippet_embs/(np.linalg.norm(snippet_embs, axis=1, keepdims=True))
cosine_sims = np.dot(query_embs_n, snippet_embs_n.T)
cosine_sims

array([[0.8716777]], dtype=float32)