# Sentence Transformer

In [1]:
from sentence_transformers import SentenceTransformer, util
import torch
import json

In [2]:
torch.cuda.is_available()

True

In [3]:
embedder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1', device='cuda')


In [4]:
data = json.load(open('data.json', 'r'))

In [5]:
corpus = list()
hrefList = list()
for i in data:
    corpus.extend(data[i]['data'])
    hrefList.extend([data[i]['href']]*len(data[i]['data']))

In [6]:
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

In [7]:
top_k = min(5, len(corpus))

In [15]:
query = "Create a REST API"

In [16]:
query_embedding = embedder.encode(query, convert_to_tensor=True)

In [17]:
cos_scores = util.dot_score(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
print("Query:", query)
print("Top 5 most similar sentences in corpus:")
for score, idx in zip(top_results[0], top_results[1]):
    print(corpus[idx], "(Score: {:.4f}) | href:{}".format(score, hrefList[idx]))

Query: Create a REST API
Top 5 most similar sentences in corpus:
REST API (Score: 0.8741) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_publish_single.html#
Overview of Creating a REST API from Scratch (Score: 0.8539) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/co-overview_create_rest_api.html#
REST API. (Score: 0.8218) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_tagging.html#
To create an API (Score: 0.8076) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-qsg_create_api.html#
How Do I Create an API? (Score: 0.7930) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gatew

In [18]:
len(hrefList)

13855

In [19]:
semanticSearchResult = util.semantic_search(
    query_embedding, corpus_embeddings)
for i in semanticSearchResult[0]:
    # print(i)
    print(corpus[i['corpus_id']],
          "(Score: {:.4f}) | href:{}".format(i['score'], hrefList[i['corpus_id']]))


REST API (Score: 0.8741) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_publish_single.html#
Overview of Creating a REST API from Scratch (Score: 0.8539) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/co-overview_create_rest_api.html#
REST API. (Score: 0.8218) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_tagging.html#
To create an API (Score: 0.8076) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-qsg_create_api.html#
How Do I Create an API? (Score: 0.7930) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-qsg_create_api.html#
REST API Details (S

In [20]:
corpus[semanticSearchResult[0][0]['corpus_id']]

'REST API'

# FAISS

In [1]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer, util
import json
import time

In [2]:
data = json.load(open('data.json', 'r'))
corpus = list()
hrefList = list()
for i in data:
    corpus.extend(data[i]['data'])
    hrefList.extend([data[i]['href']]*len(data[i]['data']))

In [3]:
faissModel = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1', device='cuda')
faissEmbedding = faissModel.encode(corpus)

In [4]:
index = faiss.IndexIDMap(faiss.IndexFlatIP(768))
index.add_with_ids(faissEmbedding, np.array(range(0,len(faissEmbedding))))

AssertionError: 

In [None]:
faiss.write_index(index, 'faiss.index')

In [None]:
def search(query):
    t=time.time()
    query_vector = faissModel.encode([query], show_progress_bar=True)
    k = 5
    top_k = index.search(query_vector, k)
    print('totaltime: {}'.format(time.time()-t))
    return [hrefList[_id] for _id in top_k[1].tolist()[0]]

In [None]:
query=str(input())
results=search(query)
print('results :')
for result in results:
    print('\t',result)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

totaltime: 0.038728952407836914
results :
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_publish_single.html#
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta-api_tagging.html#
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta_retrieve_json_bearer_token.html#
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/gtw_manage_apis.html#
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/ta_retrieve_json_bearer_token.html#
