In [1]:
from sentence_transformers import SentenceTransformer
from scipy import spatial

  from .autonotebook import tqdm as notebook_tqdm


# Load model

In [3]:
SBERT_model = SentenceTransformer('../model/22k_sample/22k_sample/')
sentences = ["Hey there, are you good ?" , "How are you today?"]
vectors = SBERT_model.encode(sentences)

In [4]:
similarity = 1 - spatial.distance.cosine(vectors[0], vectors[1])
print(similarity)

0.5722304582595825


# Save model to directory

In [6]:
directory = './model'
SBERT_model.save(directory)

In [19]:
# install torch-model-archiver
!pip install torch-model-archiver

Collecting torch-model-archiver
  Obtaining dependency information for torch-model-archiver from https://files.pythonhosted.org/packages/9b/20/08047e340f7d136695eec8230eb7eef9fdf5d4d75ddedb146d2b76b5d833/torch_model_archiver-0.9.0-py3-none-any.whl.metadata
  Downloading torch_model_archiver-0.9.0-py3-none-any.whl.metadata (1.4 kB)
Collecting enum-compat (from torch-model-archiver)
  Downloading enum_compat-0.0.3-py3-none-any.whl (1.3 kB)
Downloading torch_model_archiver-0.9.0-py3-none-any.whl (14 kB)
Installing collected packages: enum-compat, torch-model-archiver
Successfully installed enum-compat-0.0.3 torch-model-archiver-0.9.0


# Create mar file

In [8]:
!torch-model-archiver --model-name sbert --version 1.0 --serialized-file model/model.safetensors --handler run_handler.py --extra-files "model/config.json,model/vocab.txt" --export-path .


```sh
docker build -t ptserve-sbert:v1
docker run -rm -it -p 3000:8000 ptserve-sbert:v1
```

# test

In [9]:
import requests
import json
sbert_api = "http://localhost:3000"
sentences = ["Hey there, are you good ?" , "How are you today?"]
response = requests.post(sbert_api + '/predictions/SBERT',data = {'data' : json.dumps({'queries' : sentences})})
if response.status_code:
    vectors = response.json()
    similarity = 1 - spatial.distance.cosine(vectors[0], vectors[1])
    print(similarity)

0.5722305470510088


# load text embedding

In [58]:
text_ebd = json.load(open('../text_ebd.json','r'))

In [59]:
embeddings = {}
for x in text_ebd:
    if x['label'] not in embeddings:
        embeddings[x['label']] = []
    embeddings[x['label']].append(x['embedding_label'])

In [69]:
import numpy as np
my_skill = "python , machine learning , deep learning"
response = requests.post(sbert_api + '/predictions/SBERT',data = {'data' : json.dumps({'queries' : [my_skill]})})
result = {}       
if response.status_code:
    my_skill_vector = response.json()[0]
    # calculate mean similarity score over labels
    
    for label in embeddings:
        similarity_scores = []
        for embed in embeddings[label]:
            similarity_scores.append(1- spatial.distance.cosine(my_skill_vector,embed))
        result[label] = np.mean(similarity_scores)

In [70]:
# sort the result
result = {k: v for k, v in sorted(result.items(), key=lambda item: item[1],reverse=True)}
print(result)


{'AI Engineer': 0.3174099713230376, 'Data Scientist': 0.260975348115142, 'System Engineer': 0.24309589227261694, 'Data Engineer': 0.20026894576905616, 'ERP Engineer': 0.19408055682212033, 'product manager': 0.18510211040041114, 'project management': 0.18031327409847833, 'IT Consultant': 0.17627145356397053, 'Data Analyst': 0.17301229549634933, 'IT Lead': 0.16445666113500776, 'Data Architect': 0.15007072930680554, 'QA-QC': 0.14024028137735015, 'Designer': 0.13677837792050782, 'business analyst': 0.1355498116775852, 'product owner': 0.1279543394130478, 'embedded engineer': 0.12150634112031009, 'System Admin': 0.10302775103177542, 'Solution Architect': 0.09668156490867848, 'Tester': 0.09646868402118336, 'DevOps Engineer': 0.06966344768696633, 'game developer': 0.0645053750107425, 'back-end developer': 0.05440490205038706, 'full-stack developer': 0.038860542154955165, 'front-end developer': 0.015493780341919387, 'mobile developer': -0.0034063260511167925}
