Skip to content

Commit

Permalink
Generate gobi list during gobi selections
Browse files Browse the repository at this point in the history
  • Loading branch information
jgreben committed May 9, 2024
1 parent 24b8444 commit cb5d4f8
Show file tree
Hide file tree
Showing 3 changed files with 523 additions and 2 deletions.
10 changes: 8 additions & 2 deletions libsys_airflow/dags/data_exports/gobi_transmission.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
archive_transmitted_data_task,
)

from libsys_airflow.plugins.data_exports.marc.gobi import gobi_list_from_marc_files

logger = logging.getLogger(__name__)

default_args = {
Expand All @@ -37,11 +39,15 @@ def send_gobi_records():

gather_files = gather_files_task(vendor="gobi")

transmit_data = transmit_data_ftp_task("ftp-ftp.ybp.com-stanford", gather_files)
generate_isbn_list = gobi_list_from_marc_files(gather_files)

transmit_data = transmit_data_ftp_task(
"ftp-ftp.ybp.com-stanford", generate_isbn_list
)

archive_data = archive_transmitted_data_task(transmit_data['success'])

start >> gather_files >> transmit_data >> archive_data >> end
start >> gather_files >> generate_isbn_list >> transmit_data >> archive_data >> end


send_gobi_records()
101 changes: 101 additions & 0 deletions libsys_airflow/plugins/data_exports/marc/gobi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import ast
import itertools
import logging
import pathlib
import pymarc
import re

from libsys_airflow.plugins.data_exports.marc.transformer import Transformer

logger = logging.getLogger(__name__)


def gobi_list_from_marc_files(marc_file_list: str):
gobi_lists = []
gobi_transformer = GobiTransformer()
for marc_file in ast.literal_eval(marc_file_list):
gobi_lists.append(gobi_transformer.generate_list(marc_file=marc_file))

return gobi_lists


class GobiTransformer(Transformer):
def generate_list(self, marc_file) -> pathlib.Path:
marc_path = pathlib.Path(marc_file)
gobi_list_name = marc_path.stem
gobi_path = pathlib.Path(marc_path.parent.parent) / f"stf.{gobi_list_name}.txt"

with marc_path.open('rb') as fo:
marc_records = [record for record in pymarc.MARCReader(fo)]

logger.info(
f"Reading {len(marc_records):,} record(s) for gobi data export list"
)

print_list = []
ebook_list = []

for i, record in enumerate(marc_records):
if not i % 100:
logger.info(f"{i:,} records processed")

isbns = record.get_fields("020")
for stdnum in isbns:
isbn = stdnum.get_subfields("a")[0]

# Exclude non-10 or 13-digit numbers
if not re.search(
r"^(?=(?:\D*\d){10}(?:(?:\D*\d){3})?$)[\d-]+$", isbn
):
break

fields035 = record.get_fields("035")
field856 = record.get_fields("856")
field856x = [s.get_subfields("x") for s in field856]
fields856x = list(itertools.chain.from_iterable(field856x))

field956 = record.get_fields("956")
field956x = [s.get_subfields("x") for s in field956]
fields956x = list(itertools.chain.from_iterable(field956x))

holdings_result = self.folio_client.folio_get(
f"/holdings-storage/holdings?query=(isbn=={isbn})"
)

for holding in holdings_result['holdingsRecords']:
ebook = False

if len(holding.get("holdingsTypeId", "")) > 0:
if (
self.holdings_type.get(holding["holdingsTypeId"])
== 'Electronic'
):
ebook = True

if set(['subscribed', 'gobi']).intersection(
set([s.lower() for s in fields856x + fields956x])
):
ebook = False

for field035 in fields035:
if re.search("^gls[0-9]+", field035.get_subfields("a")[0]):
ebook = False

if ebook:
ebook_list.append(isbn)

items_result = self.folio_client.folio_get(
f"inventory/items?query=(holdingsRecordId=={holding['id']})"
)

if len(items_result['items']):
print_list.append(isbn)

with gobi_path.open("w+") as (fo):
for p_isbn in print_list:
fo.write(f"{p_isbn}|print|325099\n")

for e_isbn in ebook_list:
fo.write(f"{e_isbn}|ebook|325099\n")

return gobi_path

0 comments on commit cb5d4f8

Please sign in to comment.