Skip to content

Commit

Permalink
Merge pull request #144 from sul-dlss/date-cataloged
Browse files Browse the repository at this point in the history
Inserts cataloged date into instance records
  • Loading branch information
shelleydoljack committed Sep 16, 2022
2 parents dbdd1de + 348ef26 commit 76b6729
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 13 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Based on the documentation, [Running Airflow in Docker](https://airflow.apache.o
10. Bring up airflow, `docker-compose up` to run the containers in the
foreground, use `docker-compose up -d` to run as a daemon.
1. Access Airflow locally at http://localhost
1. Log into the worker container using `docker exec -it libsys-airflow_airflow-worker-1 /bin/bash` to view the raw work files.
1. Log into the worker container using `docker exec -it libsys-airflow_airflow-worker_1 /bin/bash` to view the raw work files.

### For FOLIO migration loads
1. In the Airflow UI under Admin > Connections, add `bib_path` with connection type `File (Path)`.
Expand Down Expand Up @@ -74,7 +74,8 @@ by running `cap {stage} airflow:webserver` or ssh into the server and run `docke
to see changes in the running Airflow environment.

## Testing
First install the FOLIO-FSE toolsm run: `pip install folioclient folio-migration-tools folio-uuid`
First install the FOLIO-FSE tools run: `pip install folioclient folio-uuid`
Install sul-dlss fork of folio_migration_tools: `pip install ${local_directory}/folio_migration_tools`

Then, to run the test suite, use [pytest](https://docs.pytest.org/).
`pytest`
Expand Down
4 changes: 3 additions & 1 deletion dags/auto_load_bibs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def create_bib_loads():

bib_record_groups = []
for marc_file in files_path.glob("*.*rc"):
record_group = {"marc": str(marc_file), "tsv": [], "tsv-base": None}
record_group = {"marc": str(marc_file), "tsv": [], "tsv-base": None, "tsv-dates": None}
for tsv_file in files_path.glob(f"{marc_file.stem}*.tsv"):
if tsv_file.name == f"{marc_file.stem}.tsv":
record_group["tsv-base"] = str(tsv_file)
elif tsv_file.name == f"{marc_file.stem}.dates.tsv":
record_group["tsv-dates"] = str(tsv_file)
else:
record_group["tsv"].append(str(tsv_file))
bib_record_groups.append(record_group)
Expand Down
1 change: 1 addition & 0 deletions dags/bib_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def marc_only(*args, **kwargs):
op_kwargs={
"library_config": sul_config,
"marc_stem": """{{ ti.xcom_pull('move-transform.move-marc-files') }}""", # noqa
"dates_tsv": "{{ ti.xcom_pull('bib-file-groups', key='tsv-dates') }}"
},
)

Expand Down
1 change: 1 addition & 0 deletions plugins/folio/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def get_bib_files(**kwargs):
task_instance.xcom_push(key="marc-file", value=bib_file_load["marc"])
task_instance.xcom_push(key="tsv-files", value=bib_file_load["tsv"])
task_instance.xcom_push(key="tsv-base", value=bib_file_load["tsv-base"])
task_instance.xcom_push(key="tsv-dates", value=bib_file_load["tsv-dates"])


def move_marc_files(*args, **kwargs) -> str:
Expand Down
17 changes: 11 additions & 6 deletions plugins/folio/instances.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import pandas as pd

from folio_migration_tools.migration_tasks.bibs_transformer import BibsTransformer

Expand All @@ -8,15 +9,17 @@
logger = logging.getLogger(__name__)


def _add_version(bibs_transformer: BibsTransformer):
"""
Handles Optimistic locking by assigning an initial version
"""
def _adjust_records(bibs_transformer: BibsTransformer, tsv_dates: str):
dates_df = pd.read_csv(tsv_dates, sep="\t", dtype={"CATKEY": str, "CREATED_DATE": str, "CATALOGED_DATE": str})
records = []
with open(bibs_transformer.processor.results_file.name) as fo:
for row in fo.readlines():
record = json.loads(row)
record["_version"] = 1
record["_version"] = 1 # for handling optimistic locking
ckey = record["hrid"].removeprefix("a")
matched_row = dates_df.loc[dates_df["CATKEY"] == ckey]
if matched_row["CATALOGED_DATE"].values[0] != "0":
record["catalogedDate"] = matched_row["CATALOGED_DATE"].values[0]
records.append(record)
with open(bibs_transformer.processor.results_file.name, "w+") as fo:
for record in records:
Expand Down Expand Up @@ -54,6 +57,8 @@ def run_bibs_transformer(*args, **kwargs):

marc_stem = kwargs["marc_stem"]

tsv_dates = kwargs["dates_tsv"]

library_config.iteration_identifier = dag.run_id

bibs_configuration = BibsTransformer.TaskConfiguration(
Expand All @@ -75,6 +80,6 @@ def run_bibs_transformer(*args, **kwargs):

bibs_transformer.do_work()

_add_version(bibs_transformer)
_adjust_records(bibs_transformer, tsv_dates)

bibs_transformer.wrap_up()
2 changes: 2 additions & 0 deletions plugins/tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def test_get_bib_files():
"marc": "sample.mrc",
"tsv": ["sample.public.tsv", "sample.circ.tsv"],
"tsv-base": "sample.tsv",
"tsv-dates": "sample.dates.tsv"
},
}
}
Expand All @@ -262,6 +263,7 @@ def test_get_bib_files():
assert messages["marc-file"].startswith("sample.mrc")
assert len(messages["tsv-files"]) == 2
assert messages["tsv-base"].startswith("sample.tsv")
assert messages["tsv-dates"].startswith("sample.dates.tsv")
messages


Expand Down
16 changes: 12 additions & 4 deletions plugins/tests/test_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pydantic

from plugins.folio.instances import (
_add_version,
_adjust_records,
post_folio_instance_records,
run_bibs_transformer,
)
Expand All @@ -24,21 +24,29 @@ class MockBibsTransformer(pydantic.BaseModel):
processor = MockBibsProcessor()


def test_add_version(mock_file_system): # noqa
def test_adjust_records(mock_file_system): # noqa
bib_transformer = MockBibsTransformer()
instances_file = mock_file_system[3] / "folio_srs_instances.json"
instances_file.write_text(
json.dumps({"id": "3e815a91-8a6e-4bbf-8bd9-cf42f9f789e1"})
"""{"id": "3e815a91-8a6e-4bbf-8bd9-cf42f9f789e1", "hrid": "a123456"}
{"id": "123326dd-9924-498f-9ca3-4fa00dda6c90", "hrid": "a98765"}"""
)
tsv_dates_file = mock_file_system[3] / "libr.ckeys.001.dates.tsv"
tsv_dates_file.write_text(
"""CATKEY\tCREATED_DATE\tCATALOGED_DATE
123456\t19900927\t19950710
98765\t20220101\t0""")

bib_transformer.processor.results_file.name = str(instances_file)

_add_version(bib_transformer)
_adjust_records(bib_transformer, str(tsv_dates_file))

with instances_file.open() as fo:
instance_records = [json.loads(row) for row in fo.readlines()]

assert instance_records[0]["_version"] == 1
assert instance_records[0]["catalogedDate"] == "19950710"
assert "catalogedDate" not in instance_records[1]


def test_post_folio_instance_records():
Expand Down

0 comments on commit 76b6729

Please sign in to comment.