In [None]:
!pip install chars2vec
!pip install -U sentence-transformers

from sentence_transformers import SentenceTransformer
import chars2vec
import pickle
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import json

chars2vec_model = chars2vec.load_model('data/Auth2Vec_model')
bert_model = SentenceTransformer('distilbert-base-nli-mean-tokens')

with open('data/index2auth.pickle','rb') as file:
  index2auth=pickle.load(file)

with open('data/venue_fullforms','rb') as file:
  venue_fullforms=pickle.load(file) 

Collecting chars2vec
[?25l  Downloading https://files.pythonhosted.org/packages/04/0a/8c327aae23e0532d239ec7b30446aca765eb5d9547b4c4b09cdd82e49797/chars2vec-0.1.7.tar.gz (8.1MB)
[K     |████████████████████████████████| 8.1MB 4.7MB/s 
[?25hBuilding wheels for collected packages: chars2vec
  Building wheel for chars2vec (setup.py) ... [?25l[?25hdone
  Created wheel for chars2vec: filename=chars2vec-0.1.7-cp36-none-any.whl size=8111096 sha256=4219de1189189b9e0edcb5c3c82155a5c8d5ca503cd8621513a69f8664310180
  Stored in directory: /root/.cache/pip/wheels/97/b6/65/d7e778ef1213ec77d315aea0f536068b96e36cc94c02abbfde
Successfully built chars2vec
Installing collected packages: chars2vec
Successfully installed chars2vec-0.1.7
Collecting sentence-transformers
[?25l  Downloading https://files.pythonhosted.org/packages/f5/5a/6e41e8383913dd2ba923cdcd02be2e03911595f4d2f9de559ecbed80d2d3/sentence-transformers-0.3.9.tar.gz (64kB)
[K     |████████████████████████████████| 71kB 3.1MB/s 
[?25hColl

100%|██████████| 245M/245M [00:33<00:00, 7.22MB/s]


In [None]:
def generateEmbeddings(reference):

  authors_list = reference["authors"]
  title = reference["title"]
  venue = reference["venue"]
  reference_embedding = {}

  for i in range(len(authors_list)):
    main_author_embeddings = []
    main_author = authors_list[i]
    co_authors = [auth for auth in authors_list if auth != main_author] 

    main_author_emb = chars2vec_model.vectorize_words([main_author])[0]
    title_emb = bert_model.encode(title)
    venue_emb = bert_model.encode(venue_fullforms[venue])

    for co_author in co_authors:
       co_author_emb = chars2vec_model.vectorize_words([co_author])[0]
       main_author_embeddings.append(np.concatenate([main_author_emb,co_author_emb,title_emb,venue_emb]))
    main_author_embeddings = np.array(main_author_embeddings)
    reference_embedding[i] = main_author_embeddings
  
  return reference_embedding

In [None]:
def predict_authors(reference,saved_model):

  predicted_authors_dict = {}
  reference_embedding = generateEmbeddings(reference)
  for main_auth_index in reference_embedding:

    main_author_embeddings = reference_embedding[main_auth_index]
    main_author_predictions = []

    for i in range(len(main_author_embeddings)):
      emb = main_author_embeddings[i]
      emb = tf.keras.utils.normalize(emb)
      main_author_predictions.append(saved_model.predict(emb)[0])

    main_author_predictions = np.array(main_author_predictions)
    main_author_predictions_sum = np.sum(main_author_predictions,axis=0)
    index_of_main_author = np.argmax(main_author_predictions_sum)
    predicted_authors_dict[reference["authors"][main_auth_index]] = index2auth[index_of_main_author]
    
  return predicted_authors_dict


In [None]:
ref = {}
ref["authors"] = ["B Li", "J Liu"]
ref["title"] = "Research on Semantic-Based Web Services Registry Federation"
ref["venue"] = "GCC"

path_to_trained_model = 'data/Reference2Auth_model.h5'
saved_model = tf.keras.models.load_model(path_to_trained_model)


predicted_list = predict_authors(ref,saved_model)
print(predicted_list)

{'B Li': 'Bing Li 0010', 'J Liu': 'Jin Liu'}
