In [1]:
import time
import requests
import json
import multiprocessing as mp
import mag_functions as F
from math import ceil

In [7]:
# input that contains a single affiliation string to parse
with open("test_json_single.json", "rb") as f:
    input_json = json.load(f)

In [8]:
input_json

[{'title': 'Multiplication of matrices of arbitrary shape on a data parallel computer',
  'abstract_inverted_index': {'Some': [0],
   'level-2': [1],
   'and': [2],
   'level-3': [3],
   'Distributed': [4],
   'Basic': [5],
   'Linear': [6],
   'Algebra': [7],
   'Subroutines': [8],
   '(DBLAS)': [9],
   'that': [10],
   'have': [11],
   'been': [12],
   'implemented': [13],
   'on': [14, 26],
   'the': [15, 27],
   'Connection': [16],
   'Machine': [17],
   'system': [18],
   'CM-200': [19],
   'are': [20],
   'described.': [21],
   'No': [22],
   'assumption': [23],
   'is': [24],
   'made': [25],
   'shape': [28],
   'or': [29],
   '...': [30]},
  'inverted': True,
  'topics': [10829, 10054, 11522]}]

In [48]:
# input that contains hard samples to make sure the model does not return errors
# with open("test_json_single_empty.json", "rb") as f:
#     input_json = json.load(f)

In [11]:
# input that contains 6 affiliation strings to parse
# with open("test_json_batch.json", "rb") as f:
#     input_json = json.load(f)

### Testing Through Sagemaker

In [3]:
import boto3
import json

In [38]:
sagemaker = boto3.client("sagemaker-runtime", region_name="us-east-1")

endpoint_name=''

In [41]:
%%time
response = sagemaker.invoke_endpoint(
                            EndpointName=endpoint_name, 
                            ContentType='application/json',
                            Body=bytes(json.dumps(input_json), 'utf-8'))

CPU times: user 3.14 ms, sys: 0 ns, total: 3.14 ms
Wall time: 267 ms


In [42]:
response['Body'].read().decode()

'[[{"keyword_id": "dynamic-load-balancing", "score": 0.544396}, {"keyword_id": "parallel-computing", "score": 0.527434}, {"keyword_id": "multicore-architectures", "score": 0.5102}]]'

### Testing Through API

In [98]:
%%time
# testing the call to the API one time
len(json.loads(F.get_tags(input_json, 1)[1]))

CPU times: user 36.2 ms, sys: 0 ns, total: 36.2 ms
Wall time: 956 ms


1

In [12]:
json.loads(F.get_tags(input_json, 1)[1])

[[{'topic_id': 10829,
   'topic_label': '829: Networks on Chip in System-on-Chip Design',
   'topic_score': 0.9978},
  {'topic_id': 10054,
   'topic_label': '54: Parallel Computing and Performance Optimization',
   'topic_score': 0.9963},
  {'topic_id': 11522,
   'topic_label': '1522: Design and Optimization of Field-Programmable Gate Arrays and Application-Specific Integrated Circuits',
   'topic_score': 0.991},
  {'topic_id': 12923,
   'topic_label': '2923: Connected Component Labeling Algorithms',
   'topic_score': 0.9897},
  {'topic_id': 12292,
   'topic_label': '2292: Graph Matching and Analysis Techniques',
   'topic_score': 0.9872}]]

### Test throughput with Locust

In [None]:
!locust --processes 4

### Testing API with data random queried OpenAlex data

In [3]:
%%time
open_req = "https://api.openalex.org/works/random"
resp = requests.get(open_req).json()
print(resp['id'])

https://openalex.org/W183609487
CPU times: user 64.7 ms, sys: 0 ns, total: 64.7 ms
Wall time: 607 ms


In [7]:
%%time
open_req = "https://api.openalex.org/works/W2214219043"
resp = requests.get(open_req).json()
print(resp['id'])

if resp['primary_location']['source']:
    journal_display_name = resp['primary_location']['source']['display_name']
else:
    journal_display_name = ""


input_json = [{'title': resp['title'], 
               'abstract_inverted_index': resp['abstract_inverted_index'], 
               'journal_display_name': journal_display_name, 
               'referenced_works': resp['referenced_works'],
               'inverted': True}]

model_res = sagemaker.invoke_endpoint(
        EndpointName='',
        Body=bytes(json.dumps(input_json), 'utf-8'),
        ContentType='application/json',
        Accept='Accept')
print(input_json[0]['title'])
print("")
for i in json.loads(model_res['Body'].read().decode())[0]:
    print(i)

https://openalex.org/W2214219043
Divergent growth strategies between red algae and kelps influence biomechanical properties

{'topic_id': 10643, 'topic_label': '643: Ecological Dynamics of Marine Environments', 'topic_score': 0.9998}
{'topic_id': 10765, 'topic_label': '765: Marine Biodiversity and Ecosystem Functioning', 'topic_score': 0.9725}
{'topic_id': 14047, 'topic_label': '4047: Paleoceanography and Geology of the Black Sea', 'topic_score': 0.9641}
CPU times: user 36.1 ms, sys: 1.17 ms, total: 37.3 ms
Wall time: 484 ms
