Skip to content

Commit

Permalink
fix: chunked data in EMSI client for xblock-skills job (#182)
Browse files Browse the repository at this point in the history
* fix: chunked data at 50000 byte in EMSI client for xblock-skills job

* fix: Added unit test
  • Loading branch information
mahamakifdar19 committed Sep 13, 2023
1 parent 2c39d13 commit 50d77ac
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Change Log
Unreleased

[1.44.2] - 2023-09-11
---------------------
* fix: chunked data at 50000 byte in EMSI client for xblock-skills job

[1.44.1] - 2023-08-25
---------------------
* feat: add prefetch related to the whitelisted product skills
Expand Down
2 changes: 1 addition & 1 deletion taxonomy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# 2. MINOR version when you add functionality in a backwards compatible manner, and
# 3. PATCH version when you make backwards compatible bug fixes.
# More details can be found at https://semver.org/
__version__ = '1.44.1'
__version__ = '1.44.2'

default_app_config = 'taxonomy.apps.TaxonomyConfig' # pylint: disable=invalid-name
6 changes: 6 additions & 0 deletions taxonomy/emsi/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class EMSISkillsApiClient(JwtEMSIApiClient):
"""

API_BASE_URL = urljoin(JwtEMSIApiClient.API_BASE_URL, '/skills/versions/8.9')
MAX_LIGHTCAST_DATA_SIZE = 50000 # Maximum 50,000-byte data is supported by LightCast

def __init__(self):
"""
Expand Down Expand Up @@ -229,6 +230,11 @@ def get_product_skills(self, text_data):
Returns:
dict: A dictionary containing details of all the skills.
"""

if text_data and len(text_data) > self.MAX_LIGHTCAST_DATA_SIZE:
# Truncate the text_data to 50,000 bytes since only 50,000-byte data is supported by LightCast
text_data = text_data[:self.MAX_LIGHTCAST_DATA_SIZE]

data = {
'text': text_data
}
Expand Down
17 changes: 17 additions & 0 deletions tests/emsi/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import logging
from time import time
from unittest import mock
from faker import Faker

import responses
from pytest import raises
Expand Down Expand Up @@ -161,6 +163,21 @@ def test_get_product_skills(self):

assert skills == SKILLS_EMSI_CLIENT_RESPONSE

def test_get_product_skills_large_text(self):
"""
Validate that the behavior of client while fetching product skills for very large text.
"""
api_response = mock.Mock()
api_response.json.return_value = SKILLS_EMSI_RESPONSE
self.client.is_token_expired = mock.Mock(return_value=False)
self.client.client = mock.MagicMock(post=mock.Mock(return_value=api_response))

max_data_size = self.client.MAX_LIGHTCAST_DATA_SIZE
skill_text_data = Faker().text(max_data_size + max_data_size * 0.1)
self.client.get_product_skills(skill_text_data)

assert len(self.client.client.post.call_args_list[0][1]['json']['text']) == max_data_size

@mock_api_response(
method=responses.POST,
url=EMSISkillsApiClient.API_BASE_URL + '/extract',
Expand Down

0 comments on commit 50d77ac

Please sign in to comment.