Skip to content

Commit

Permalink
Keep single records for every id
Browse files Browse the repository at this point in the history
Created objects that are kept in path: records with different ids but the same rvk elements, encoded them as json and reopened them with direction records as the record container. By that I am able to create a single record for each id.
  • Loading branch information
TobiasNx committed Jun 3, 2024
1 parent 239469d commit 06c1955
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
4 changes: 3 additions & 1 deletion Concordance-RVK-Verbundbibliothek/bulk.csv
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
"HT013166356, HT018625006, TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5603","CI 5604","EC 2430","IH 34381"
"HT013166356","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"
"HT018625006","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"
"TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//
// curl -XPOST --header 'Content-Type: application/x-ndjson' -d @bulk.ndjson 'http://localhost:9200/_bulk'

default outfile = FLUX_DIR + "bulk.json";
default outfile = FLUX_DIR + "bulk.csv";
default infile = FLUX_DIR + "aggregate_auslieferung_20191212.small.marcxml.gz";
default fixfile = FLUX_DIR + "fix-cg-to-es.fix";

Expand All @@ -21,7 +21,8 @@ infile
| decode-xml
| handle-marcxml
| fix(fixfile)
| encode-json
| decode-json(recordPath="records")
| encode-csv
//encode-json
| write(outfile)
;
27 changes: 17 additions & 10 deletions Concordance-RVK-Verbundbibliothek/fix-cg-to-es.fix
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
set_array("id")
set_array("rvk[]")
set_array("records[]")
set_array("@id[]")
set_array("@rvk[]")

do list(path: "084??", "var": "$i")
if any_match("$i.2", "rvk")
copy_field("$i.a","rvk[].$append")
copy_field("$i.a","@rvk[].$append")
end
end

uniq("@rvk[]")


do list(path: "035??", "var": "$i")
if any_match("$i.a", "^\\(DE-605\\)(.*)")
copy_field("$i.a","id.$append")
copy_field("$i.a","@id[].$append")
end
end
replace_all("id.*","^\\(DE-605\\)(.*)","$1")
join_field("id",", ")
replace_all("id[].*","^\\(DE-605\\)(.*)","$1")

do list(path: "@id[]", "var": "$i")
copy_field("$i","records[].$append.id")
copy_field("@rvk[]","records[].$last.rvk[]")
end
replace_all("records[].*.id","^\\(DE-605\\)(.*)","$1")

retain("rvk[]","id")
vacuum()

# Filter records without RVK
unless exists("rvk[]")
unless exists("@rvk[]")
reject()
end

# Filter records without hbz ids
unless exists("id")
unless exists("@id[]")
reject()
end


retain("records[]")


0 comments on commit 06c1955

Please sign in to comment.