Skip to content

Commit

Permalink
Group results in completeness: import into DB #199
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Apr 27, 2023
1 parent a8c5a58 commit 52f33d7
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
13 changes: 12 additions & 1 deletion common-script
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ do_completeness() {
./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/completeness.log
}

do_completeness_sqlite() {
run completeness_sqlite
HAS_GROUP_PARAM=$(echo ${TYPE_PARAMS} | grep -c -P -e '--groupBy [^-]' || true)
if [[ "${HAS_GROUP_PARAM}" == "1" ]]; then
bash scripts/sqlite/completeness-groupped.sqlite.sh
fi
}

do_classifications() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--with-delete|\s*--ignorableIssueTypes [^ ]+//g')
run classifications
Expand Down Expand Up @@ -426,6 +434,7 @@ do_all_analyses() {
do_validate
do_sqlite
do_completeness
do_completeness_sqlite
do_classifications
do_authorities
do_tt_completeness
Expand Down Expand Up @@ -470,6 +479,7 @@ commands:
prepare-solr prepare indexing
index indexing with Solr
sqlite import tables to SQLite
completeness-sqlite import groupped output of completeness to SQLite
export-schema-files export schema files
shacl4bib run SHACL-like validation
all-analyses run all analitical tasks
Expand Down Expand Up @@ -529,7 +539,7 @@ case "${1:-help}" in
validate) do_validate ; do_sqlite ;;
prepare-solr) do_prepare_solr ;;
index) do_index ;;
completeness) do_completeness ;;
completeness) do_completeness ; do_completeness_sqlite ;;
classifications) do_classifications ;;
authorities) do_authorities ;;
tt-completeness) do_tt_completeness ;;
Expand All @@ -542,6 +552,7 @@ case "${1:-help}" in
pareto) do_pareto ;;
marc-history) do_marc_history ;;
record-patterns) do_record_patterns ;;
completeness-sqlite) do_completeness_sqlite ;;
sqlite) do_sqlite ;;
mysql) do_mysql ;;
export-schema-files) do_export_schema_files ;;
Expand Down
29 changes: 29 additions & 0 deletions scripts/sqlite/completeness-groupped.sqlite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
CREATE TABLE IF NOT EXISTS "groupped_marc_elements" (
"groupId" INTEGER,
"documenttype" TEXT,
"path" TEXT,
"packageid" INTEGER,
"package" TEXT,
"tag" TEXT,
"subfield" TEXT,
"number-of-record" INTEGER,
"number-of-instances" INTEGER,
"min" INTEGER,
"max" INTEGER,
"mean" REAL,
"stddev" REAL,
"histogram" TEXT
);
CREATE INDEX IF NOT EXISTS "groupId" ON "groupped_marc_elements" ("groupId");
EOF

tail -n +2 ${OUTPUT_DIR}/completeness-groupped-marc-elements.csv > ${OUTPUT_DIR}/completeness-groupped-marc-elements-noheader.csv

echo "import marc elements"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
.import ${OUTPUT_DIR}/completeness-groupped-marc-elements-noheader.csv groupped_marc_elements
EOF

rm ${OUTPUT_DIR}/completeness-groupped-marc-elements-noheader.csv

0 comments on commit 52f33d7

Please sign in to comment.