From 0ac2db7b304ba4eefc2a3074973e97828fe8ff5e Mon Sep 17 00:00:00 2001 From: Jeremy Nelson Date: Wed, 10 Apr 2024 17:26:08 -0700 Subject: [PATCH] Tests for updated OCLC API wrapper and lint fixes --- .../plugins/data_exports/marc/oclc.py | 2 - .../plugins/data_exports/oclc_api.py | 89 ++++-- tests/data_exports/test_oclc_api.py | 295 +++++++++++------- 3 files changed, 240 insertions(+), 146 deletions(-) diff --git a/libsys_airflow/plugins/data_exports/marc/oclc.py b/libsys_airflow/plugins/data_exports/marc/oclc.py index e93fab29..2316f4a0 100644 --- a/libsys_airflow/plugins/data_exports/marc/oclc.py +++ b/libsys_airflow/plugins/data_exports/marc/oclc.py @@ -100,8 +100,6 @@ def divide(self, marc_file) -> None: case _: self.multiple_codes(record, code, record_ids) - - def multiple_codes(self, record: pymarc.Record, code: str, record_ids: list): instance_id = record['999']['i'] self.staff_notices.append((instance_id, code, record_ids)) diff --git a/libsys_airflow/plugins/data_exports/oclc_api.py b/libsys_airflow/plugins/data_exports/oclc_api.py index 99966664..ceaa6c09 100644 --- a/libsys_airflow/plugins/data_exports/oclc_api.py +++ b/libsys_airflow/plugins/data_exports/oclc_api.py @@ -9,8 +9,9 @@ from typing import List, Union from bookops_worldcat import WorldcatAccessToken, MetadataSession -from bookops_worldcat.errors import WorldcatRequestError +from bookops_worldcat.errors import WorldcatRequestError +from libsys_airflow.plugins.data_exports.marc.oclc import get_record_id from libsys_airflow.plugins.folio_client import folio_client logger = logging.getLogger(__name__) @@ -26,23 +27,20 @@ def __init__(self, **kwargs): self.oclc_token = None client_id = kwargs["client_id"] secret = kwargs["secret"] - self.httpx_client = None + self.httpx_client = httpx.Client() self.snapshot = None self.__authenticate__(client_id, secret) self.folio_client = folio_client() def __del__(self): - if self.httpx_client: - self.httpx_client.close() if self.snapshot: self.__close_snapshot__() + self.httpx_client.close() def __authenticate__(self, client_key, secret) -> None: try: self.oclc_token = WorldcatAccessToken( - key=client_key, - secret=secret, - scopes="WorldCatMetadataAPI" + key=client_key, secret=secret, scopes="WorldCatMetadataAPI" ) except Exception as e: msg = "Unable to Retrieve Worldcat Access Token" @@ -87,21 +85,41 @@ def __srs_uuid__(self, record) -> Union[str, None]: logger.error("Record Missing SRS uuid") return srs_uuid - def __update_035__(self, oclc_put_result: bytes, record: pymarc.Record) -> None: + def __update_035__( + self, oclc_put_result: bytes, record: pymarc.Record, srs_uuid: str + ) -> bool: """ - Extracts 035 field with new OCLC number adds to existing MARC21 + Extracts 035 field with new OCLC number and adds to existing MARC21 record """ oclc_record = pymarc.Record(data=oclc_put_result) # type: ignore fields_035 = oclc_record.get_fields('035') for field in fields_035: - subfields_a = field.get_subfields("a") - for subfield in subfields_a: + for subfield in field.get_subfields("a"): if subfield.startswith("(OCoLC"): record.add_ordered_field(field) break + return self.__put_folio_record__(srs_uuid, record) - def put_folio_record(self, srs_uuid: str, record: pymarc.Record) -> bool: + def __update_oclc_number__( + self, record: pymarc.Record, control_number: str, srs_uuid: str + ) -> bool: + """ + Updates 035 field if control_number has changed + """ + for field in record.get_fields('035'): + for subfield in field.get_subfields("a"): + if control_number in subfield: + return True # Control number already exists + new_035 = pymarc.Field( + tag='035', + indicators=[' ', ' '], + subfields=[pymarc.Subfield(code='a', value=f"(OCoLC){control_number}")], + ) + record.add_ordered_field(new_035) + return self.__put_folio_record__(srs_uuid, record) + + def __put_folio_record__(self, srs_uuid: str, record: pymarc.Record) -> bool: """ Updates FOLIO SRS with updated MARC record with new OCLC Number in the 035 field @@ -143,19 +161,20 @@ def new(self, marc_files: List[str]) -> dict: session.bib_validate( record=marc21, recordFormat="application/marc", - validationLevel="validateFull" + validationLevel="validateFull", ) new_record = session.bib_create( record=marc21, recordFormat="application/marc", ) - self.__update_035__(new_record, record) - except WorldcatRequestError as e: + if self.__update_035__(new_record.text, record, srs_uuid): # type: ignore + output['success'].append(srs_uuid) + else: + output['failures'].append(srs_uuid) + except WorldcatRequestError as e: logger.error(e) output['failures'].append(srs_uuid) continue - - output['success'].append(srs_uuid) return output def update(self, marc_files: List[str]): @@ -165,19 +184,25 @@ def update(self, marc_files: List[str]): return output marc_records = self.__read_marc_files__(marc_files) - for record in marc_records: - srs_uuid = self.__srs_uuid__(record) - if srs_uuid is None: - continue - post_result = self.httpx_client.post( - f"{OCLCAPIWrapper.worldcat_metadata_url}/manage/institution/holdings/set", - headers=self.oclc_headers, - data=record.as_marc21(), - ) - if post_result.status_code != 200: - logger.error(f"Failed to update record, error: {post_result.text}") - output['failures'].append(srs_uuid) - continue - # !Need to update OCLC code in 035 field - output['success'].append(srs_uuid) + with MetadataSession(authorization=self.oclc_token) as session: + for record in marc_records: + srs_uuid = self.__srs_uuid__(record) + if srs_uuid is None: + continue + oclc_id = get_record_id(record)[0] + try: + response = session.holdings_set(oclcNumber=oclc_id) + if response is None: + output['failures'].append(srs_uuid) + continue + if self.__update_oclc_number__( + record, response.json()['controlNumber'], srs_uuid + ): + output['success'].append(srs_uuid) + else: + output['failures'].append(srs_uuid) + except WorldcatRequestError as e: + logger.error(f"Failed to update record, error: {e}") + output['failures'].append(srs_uuid) + continue return output diff --git a/tests/data_exports/test_oclc_api.py b/tests/data_exports/test_oclc_api.py index f1d0d0f5..1780910d 100644 --- a/tests/data_exports/test_oclc_api.py +++ b/tests/data_exports/test_oclc_api.py @@ -1,83 +1,97 @@ -import json import httpx import pymarc import pytest +from unittest.mock import MagicMock + +from bookops_worldcat.errors import WorldcatAuthorizationError, WorldcatRequestError + from libsys_airflow.plugins.data_exports import oclc_api -def mock_metadata_session(): +def mock_metadata_session(authorization=None): + mock_response = MagicMock() + + def mock__enter__(*args): + return args[0] + + def mock__exit__(*args): + pass + + def mock_bib_create(**kwargs): + record = pymarc.Record(data=kwargs['record']) + if "001" in record and record["001"].value() == "00a": + raise WorldcatRequestError(b"400 Client Error") + record.add_field( + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC)345678')], + ) + ) + mock_response.text = record.as_marc21() + return mock_response + def mock_bib_validate(**kwargs): - - -# def mock_oclc_httpx(): -# def mock_response(request): -# response = None -# match request.method: - -# case 'PUT': -# if request.url.path.endswith('d63085c0-cab6-4bdd-95e8-d53696919ac1'): -# response = httpx.Response( -# status_code=422, text='Failed to update SRS' -# ) -# elif request.url.path.startswith('/source-storage/records'): -# response = httpx.Response(status_code=200) - -# case 'POST': -# if request.url.host.startswith('oauth.oclc.org'): -# response = httpx.Response( -# status_code=200, json={'access_token': "abcded12345"} -# ) -# if request.url.path.startswith('/worldcat/manage/bibs'): -# record = pymarc.Record(data=request.content) -# if len(record.get_fields('001')) == 1: -# response = httpx.Response( -# status_code=500, text='Internal Server Error' -# ) -# else: -# record.add_ordered_field( -# pymarc.Field( -# tag='035', -# indicators=[' ', ' '], -# subfields=[ -# pymarc.Subfield(code='a', value='(OCoLC)123489') -# ], -# ) -# ) -# response = httpx.Response( -# status_code=200, content=record.as_marc21() -# ) -# if request.url.path.startswith("/source-storage/snapshots"): -# response = httpx.Response(status_code=200) -# elif request.url.path.endswith('/manage/institution/holdings/set'): -# record = pymarc.Record(data=request.content) -# if len(record.get_fields('001')) == 1: -# response = httpx.Response( -# status_code=500, text='Internal Server Error' -# ) -# else: -# response = httpx.Response( -# status_code=200, -# json={ -# "controlNumber": "439887343", -# "requestedControlNumber": "439887343", -# "institutionCode": "158223", -# "institutionSymbol": "STF", -# "success": True, -# "message": "Holding Updated Successfully", -# "action": "Set Holdings", -# }, -# ) - -# return response -# return httpx.Client(transport=httpx.MockTransport(mock_response)) + pass + + def mock_holdings_set(**kwargs): + if kwargs['oclcNumber'] == "456907809": + raise WorldcatRequestError(b"400 Client Error") + if kwargs['oclcNumber'] == '2369001': + return + mock_response.json = lambda: { + "controlNumber": "439887343", + "requestedControlNumber": "439887343", + "institutionCode": "158223", + "institutionSymbol": "STF", + "success": True, + "message": "Holding Updated Successfully", + "action": "Set Holdings", + } + return mock_response + + mock_session = MagicMock() + # Supports mocking context manager + mock_session.__enter__ = mock__enter__ + mock_session.__exit__ = mock__exit__ + mock_session.bib_create = mock_bib_create + mock_session.bib_validate = mock_bib_validate + mock_session.holdings_set = mock_holdings_set + + return mock_session + + +def mock_worldcat_access_token(**kwargs): + if kwargs.get('key', '').startswith('n0taVal1dC1i3nt'): + raise WorldcatAuthorizationError( + b'{"code":401,"message":"No valid authentication credentials found in request"}' + ) + return "tk_6e302a204c2bfa4d266813cO647d62a77b10" -@pytest.fixture -def mock_bookops_package(mocker): - pass + +def mock_httpx_client(): + def mock_response(request): + response = None + match request.method: + + case 'PUT': + if request.url.path.endswith('d63085c0-cab6-4bdd-95e8-d53696919ac1'): + response = httpx.Response( + status_code=422, text='Failed to update SRS' + ) + elif request.url.path.startswith('/source-storage/records'): + response = httpx.Response(status_code=200) + + case 'POST': + if request.url.path.startswith("/source-storage/snapshots"): + response = httpx.Response(status_code=200) + + return response + + return httpx.Client(transport=httpx.MockTransport(mock_response)) -# @pytest.fixture def mock_folio_client(mocker): mock = mocker mock.okapi_headers = {} @@ -87,11 +101,14 @@ def mock_folio_client(mocker): @pytest.fixture def mock_oclc_api(mocker): - - mocker.patch( - "libsys_airflow.plugins.data_exports.oclc_api.WorldcatAccessToken", - return_value="tk_6e302a204c2bfa4d266813cO647d62a77b10" - ) + mock_httpx = mocker.MagicMock() + mock_httpx.Client = lambda: mock_httpx_client() + + mocker.patch.object(oclc_api, "httpx", mock_httpx) + + mocker.patch.object(oclc_api, "WorldcatAccessToken", mock_worldcat_access_token) + + mocker.patch.object(oclc_api, "MetadataSession", mock_metadata_session) mocker.patch( "libsys_airflow.plugins.data_exports.oclc_api.folio_client", @@ -104,8 +121,8 @@ def mock_oclc_api(mocker): def test_oclc_api_class_init(mock_oclc_api): oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) assert oclc_api_instance.folio_client.okapi_url == "https://okapi.stanford.edu/" @@ -115,8 +132,8 @@ def test_oclc_api_class_init(mock_oclc_api): def test_oclc_api_class_no_new_records(mock_oclc_api, caplog): oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) oclc_api_instance.new([]) @@ -153,8 +170,8 @@ def test_oclc_api_class_new_records(tmp_path, mock_oclc_api): marc_writer.write(record) oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) new_result = oclc_api_instance.new( @@ -170,13 +187,18 @@ def test_oclc_api_class_new_records(tmp_path, mock_oclc_api): def test_oclc_api_class_updated_records(tmp_path, mock_oclc_api): marc_record = pymarc.Record() marc_record.add_field( + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC-M)on455677')], + ), pymarc.Field( tag='999', indicators=['f', 'f'], subfields=[ pymarc.Subfield(code='s', value='ea5b38dc-8f96-45de-8306-a2dd673716d5') ], - ) + ), ) no_srs_record = pymarc.Record() @@ -196,8 +218,8 @@ def test_oclc_api_class_updated_records(tmp_path, mock_oclc_api): marc_writer.write(record) oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) updated_result = oclc_api_instance.update([str(marc_file.absolute())]) @@ -206,31 +228,19 @@ def test_oclc_api_class_updated_records(tmp_path, mock_oclc_api): assert updated_result['failures'] == [] -def test_oclc_api_failed_authenication(mocker): - mock_httpx = mocker.MagicMock() - mock_httpx.Client = httpx.Client( - transport=httpx.MockTransport(lambda *args: httpx.Response(500)) - ) - - mocker.patch.object(oclc_api, "httpx", mock_httpx) - - mocker.patch( - "libsys_airflow.plugins.data_exports.oclc_api.folio_client", - return_value=mock_folio_client, - ) +def test_oclc_api_failed_authentication(mock_oclc_api): - with pytest.raises(Exception, match="Unable to Retrieve Access Token"): - oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + with pytest.raises(Exception, match="Unable to Retrieve Worldcat Access Token"): + oclc_api.OCLCAPIWrapper( + client_id="n0taVal1dC1i3nt", secret="c867b1dd75e6490f99d1cd1c9252ef22" ) def test_oclc_api_missing_srs(mock_oclc_api, caplog): oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) record = pymarc.Record() @@ -260,8 +270,8 @@ def test_failed_oclc_new_record(tmp_path, mock_oclc_api): marc_writer.write(record) oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) new_response = oclc_api_instance.new([str(marc_file.absolute())]) @@ -283,8 +293,8 @@ def test_bad_srs_put_in_new_context(tmp_path, mock_oclc_api): ) oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) marc_file = tmp_path / "202403273-STF-new.mrc" @@ -301,18 +311,23 @@ def test_bad_srs_put_in_new_context(tmp_path, mock_oclc_api): def test_no_update_records(mock_oclc_api, caplog): oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) oclc_api_instance.update([]) assert "No updated marc records" in caplog.text -def test_bad_holdings_set_call(tmp_path, mock_oclc_api): +def test_bad_holdings_set_call(tmp_path, mock_oclc_api, caplog): marc_record = pymarc.Record() marc_record.add_field( pymarc.Field(tag='001', data='4566'), + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC)ocn456907809')], + ), pymarc.Field( tag='999', indicators=['f', 'f'], @@ -322,19 +337,75 @@ def test_bad_holdings_set_call(tmp_path, mock_oclc_api): ), ) + bad_srs_record = pymarc.Record() + bad_srs_record.add_field( + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC)7789932')], + ), + pymarc.Field( + tag='999', + indicators=['f', 'f'], + subfields=[ + pymarc.Subfield(code='s', value='d63085c0-cab6-4bdd-95e8-d53696919ac1') + ], + ), + ) + + no_response_record = pymarc.Record() + no_response_record.add_field( + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC)2369001')], + ), + pymarc.Field( + tag='999', + indicators=['f', 'f'], + subfields=[ + pymarc.Subfield(code='s', value='6325e8fd-101a-4972-8da7-298cd01d1a9d') + ], + ), + ) marc_file = tmp_path / "202403273-STF-update.mrc" with marc_file.open('wb+') as fo: marc_writer = pymarc.MARCWriter(fo) - for record in [marc_record]: + for record in [marc_record, bad_srs_record, no_response_record]: marc_writer.write(record) oclc_api_instance = oclc_api.OCLCAPIWrapper( - client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", - secret="c867b1dd75e6490f99d1cd1c9252ef22" + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", ) update_result = oclc_api_instance.update([str(marc_file.absolute())]) assert update_result['success'] == [] - assert update_result['failures'] == ['ea5b38dc-8f96-45de-8306-a2dd673716d5'] + assert sorted(update_result['failures']) == [ + '6325e8fd-101a-4972-8da7-298cd01d1a9d', + 'd63085c0-cab6-4bdd-95e8-d53696919ac1', + 'ea5b38dc-8f96-45de-8306-a2dd673716d5', + ] + + assert "Failed to update record" in caplog.text + + +def test_already_exists_control_number(tmp_path, mock_oclc_api): + marc_record = pymarc.Record() + marc_record.add_field( + pymarc.Field(tag='001', data='4566'), + pymarc.Field( + tag='035', + indicators=['', ''], + subfields=[pymarc.Subfield(code='a', value='(OCoLC)445667')], + ), + ) + + oclc_api_instance = oclc_api.OCLCAPIWrapper( + client_id="EDIoHuhLbdRvOHDjpEBtcEnBHneNtLUDiPRYtAqfTlpOThrxzUwHDUjMGEakoIJSObKpICwsmYZlmpYK", + secret="c867b1dd75e6490f99d1cd1c9252ef22", + ) + + assert oclc_api_instance.__update_oclc_number__(marc_record, '445667', '')