# Sentence Transformer

In [1]:
from sentence_transformers import SentenceTransformer, util
import torch
import json

In [2]:
torch.cuda.is_available()

True

In [3]:
embedder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1', device='cuda')


In [4]:
data = json.load(open('data.json', 'r'))

In [5]:
corpus = list()
hrefList = list()
for i in data:
    corpus.extend(data[i]['data'])
    hrefList.extend([data[i]['href']]*len(data[i]['data']))

In [6]:
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

In [7]:
top_k = min(5, len(corpus))

In [8]:
query = "how to update API by importing file"

In [9]:
query_embedding = embedder.encode(query, convert_to_tensor=True)

In [10]:
cos_scores = util.dot_score(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
print("Query:", query)
print("Top 5 most similar sentences in corpus:")
for score, idx in zip(top_results[0], top_results[1]):
    print(corpus[idx], "(Score: {:.4f}) | href:{}".format(score, hrefList[idx]))

Query: how to update API by importing file
Top 5 most similar sentences in corpus:
Updating an API by Importing an API from a File (Score: 0.9553) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
You can update an API by importing from file or from URL by clicking the Update button. (Score: 0.8579) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/re-api_details_rest.html#
You can not update an API by importing an API from a file if the API is in the active state. (Score: 0.8418) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
4. Select Update API by importing from file. (Score: 0.8008) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/ap

In [18]:
len(hrefList)

13855

In [11]:
semanticSearchResult = util.semantic_search(
    query_embedding, corpus_embeddings)
for i in semanticSearchResult[0]:
    # print(i)
    print(corpus[i['corpus_id']],
          "(Score: {:.4f}) | href:{}".format(i['score'], hrefList[i['corpus_id']]))


Updating an API by Importing an API from a File (Score: 0.9553) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
You can update an API by importing from file or from URL by clicking the Update button. (Score: 0.8579) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/re-api_details_rest.html#
You can not update an API by importing an API from a file if the API is in the active state. (Score: 0.8418) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
4. Select Update API by importing from file. (Score: 0.8008) | href:https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
API Gateway 10.11 | Using API

In [20]:
corpus[semanticSearchResult[0][0]['corpus_id']]

'REST API'

# FAISS

In [12]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer, util
import json
import time

In [13]:
data = json.load(open('data.json', 'r'))
corpus = list()
hrefList = list()
for i in data:
    corpus.extend(data[i]['data'])
    hrefList.extend([data[i]['href']]*len(data[i]['data']))

In [14]:
faissModel = SentenceTransformer(
    'distilbert-base-nli-mean-tokens', device='cuda')
faissEmbedding = faissModel.encode(corpus, show_progress_bar=True)

Batches:   0%|          | 0/652 [00:00<?, ?it/s]

In [15]:
index = faiss.IndexHNSWFlat(faissEmbedding.shape[1], 32)
index = faiss.IndexIVFPQ(index, faissEmbedding.shape[1], 10000, 16, 8)
index.train(faissEmbedding)
index.add(faissEmbedding)



In [16]:
faiss.write_index(index, 'faissHNSWFlat_IVFPQ.index')
faiss.read_index('faissHNSWFlat_IVFPQ.index')

<faiss.swigfaiss_avx2.IndexIVFPQ; proxy of <Swig Object of type 'faiss::IndexIVFPQ *' at 0x7f70163b55f0> >

In [17]:
def search(query):
    t=time.time()
    query_vector = faissModel.encode([query], show_progress_bar=True)
    k = 5
    top_k = index.search(query_vector, k)
    print('totaltime: {}'.format(time.time()-t))
    return [hrefList[_id] for _id in top_k[1].tolist()[0]]

In [18]:
# query=str(input())
results=search(query)
print('results :')
for result in results:
    print('\t',result)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

totaltime: 0.03818464279174805
results :
	 https://documentation.softwareag.com/webmethods/api_gateway/yai10-11/10-11_API_Gateway_webhelp/api-gateway-integrated-webhelp/updating_apis_file.html#
	 https://documentation.softwareag.com/webmethods/mywebmethods_server/mws10-11/10-11_MWSw/my-webmethods-server-webhelp/ta-allowing_users_to_access_the_workspace_management_page.html#
	 https://documentation.softwareag.com/webmethods/mywebmethods_server/mws10-11/10-11_MWSw/my-webmethods-server-webhelp/to-system_2.html#
	 https://documentation.softwareag.com/webmethods/mywebmethods_server/mws10-11/10-11_MWSw/my-webmethods-server-webhelp/to-system_2.html#
	 https://documentation.softwareag.com/webmethods/mywebmethods_server/mws10-11/10-11_MWSw/my-webmethods-server-webhelp/to-system_2.html#
