Skip to content

Commit

Permalink
PICA: general changes #163
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed May 23, 2023
1 parent ef1f04c commit 6cf43fb
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 7 deletions.
2 changes: 2 additions & 0 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ do_completeness_sqlite() {

if [[ "${HAS_GROUP_PARAM}" == "1" ]]; then
bash scripts/sqlite/completeness-grouped.sqlite.sh ${OUTPUT_DIR}
else
bash scripts/sqlite/completeness.sqlite.sh ${OUTPUT_DIR}
fi
}

Expand Down
14 changes: 7 additions & 7 deletions scripts/sqlite/completeness-grouped.sqlite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ log() {

OUTPUT_DIR=$1

log "create table grouped_marc_elements"
log "create table marc_elements_grouped"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
CREATE TABLE IF NOT EXISTS "grouped_marc_elements" (
CREATE TABLE IF NOT EXISTS "marc_elements_grouped" (
"groupId" INTEGER,
"documenttype" TEXT,
"path" TEXT,
Expand All @@ -31,13 +31,13 @@ CREATE TABLE IF NOT EXISTS "grouped_marc_elements" (
"stddev" REAL,
"histogram" TEXT
);
CREATE INDEX IF NOT EXISTS "gme_groupId" ON "grouped_marc_elements" ("groupId");
CREATE INDEX IF NOT EXISTS "gme_documenttype" ON "grouped_marc_elements" ("documenttype");
CREATE INDEX IF NOT EXISTS "meg_groupId" ON "marc_elements_grouped" ("groupId");
CREATE INDEX IF NOT EXISTS "meg_documenttype" ON "marc_elements_grouped" ("documenttype");
EOF

log "clean grouped_marc_elements"
log "clean marc_elements_grouped"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
DELETE FROM grouped_marc_elements;
DELETE FROM marc_elements_grouped;
EOF

log "create headless CSV"
Expand All @@ -46,7 +46,7 @@ tail -n +2 ${OUTPUT_DIR}/completeness-grouped-marc-elements.csv > ${OUTPUT_DIR}/
log "import marc elements"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
.import ${OUTPUT_DIR}/completeness-grouped-marc-elements-noheader.csv grouped_marc_elements
.import ${OUTPUT_DIR}/completeness-grouped-marc-elements-noheader.csv marc_elements_grouped
EOF

log "drop headless CSV"
Expand Down
52 changes: 52 additions & 0 deletions scripts/sqlite/completeness.sqlite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Strores marc-elements.csv into SQLite
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#

log() {
timestamp=`date +"%F %T"`
echo -en "\033[0D\033[1;37m$timestamp>\033[0m "
echo "$1"
}

OUTPUT_DIR=$1

log "create table marc_elements"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
CREATE TABLE IF NOT EXISTS "marc_elements" (
"documenttype" TEXT,
"path" TEXT,
"packageid" INTEGER,
"package" TEXT,
"tag" TEXT,
"subfield" TEXT,
"number-of-record" INTEGER,
"number-of-instances" INTEGER,
"min" INTEGER,
"max" INTEGER,
"mean" REAL,
"stddev" REAL,
"histogram" TEXT
);
CREATE INDEX IF NOT EXISTS "gme_groupId" ON "marc_elements" ("groupId");
CREATE INDEX IF NOT EXISTS "gme_documenttype" ON "marc_elements" ("documenttype");
EOF

log "clean marc_elements"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
DELETE FROM marc_elements;
EOF

log "create headless CSV"
tail -n +2 ${OUTPUT_DIR}/marc-elements.csv > ${OUTPUT_DIR}/marc-elements-noheader.csv

log "import marc elements"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
.import ${OUTPUT_DIR}/marc-elements-noheader.csv marc_elements
EOF

log "drop headless CSV"
rm ${OUTPUT_DIR}/marc-elements-noheader.csv

0 comments on commit 6cf43fb

Please sign in to comment.