Skip to content

Commit

Permalink
Keep single records for every id
Browse files Browse the repository at this point in the history
Created objects that are kept in path: records with different ids but the same rvk elements, encoded them as json and reopened them with direction records as the record container. By that I am able to create a single record for each id.
  • Loading branch information
TobiasNx committed Jun 3, 2024
1 parent 239469d commit 69150b1
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//
// curl -XPOST --header 'Content-Type: application/x-ndjson' -d @bulk.ndjson 'http://localhost:9200/_bulk'

default outfile = FLUX_DIR + "bulk.json";
default outfile = FLUX_DIR + "bulk.csv";
default infile = FLUX_DIR + "aggregate_auslieferung_20191212.small.marcxml.gz";
default fixfile = FLUX_DIR + "fix-cg-to-es.fix";

Expand All @@ -21,7 +21,8 @@ infile
| decode-xml
| handle-marcxml
| fix(fixfile)
| encode-json
| decode-json(recordPath="records")
| encode-csv
//encode-json
| write(outfile)
;
27 changes: 17 additions & 10 deletions Concordance-RVK-Verbundbibliothek/fix-cg-to-es.fix
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
set_array("id")
set_array("rvk[]")
set_array("records[]")
set_array("@id[]")
set_array("@rvk[]")

do list(path: "084??", "var": "$i")
if any_match("$i.2", "rvk")
copy_field("$i.a","rvk[].$append")
copy_field("$i.a","@rvk[].$append")
end
end

uniq("@rvk[]")


do list(path: "035??", "var": "$i")
if any_match("$i.a", "^\\(DE-605\\)(.*)")
copy_field("$i.a","id.$append")
copy_field("$i.a","@id[].$append")
end
end
replace_all("id.*","^\\(DE-605\\)(.*)","$1")
join_field("id",", ")
replace_all("id[].*","^\\(DE-605\\)(.*)","$1")

do list(path: "@id[]", "var": "$i")
copy_field("$i","records[].$append.id")
copy_field("@rvk[]","records[].$last.rvk[]")
end
replace_all("records[].*.id","^\\(DE-605\\)(.*)","$1")

retain("rvk[]","id")
vacuum()

# Filter records without RVK
unless exists("rvk[]")
unless exists("@rvk[]")
reject()
end

# Filter records without hbz ids
unless exists("id")
unless exists("@id[]")
reject()
end


retain("records[]")


0 comments on commit 69150b1

Please sign in to comment.