Skip to content

Commit

Permalink
Group results in issues #200
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Mar 14, 2023
1 parent 7942995 commit 8577101
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 9 deletions.
29 changes: 23 additions & 6 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -253,30 +253,47 @@ do_sqlite() {
printf "%s %s> php scripts/sqlite/normalize-issue-details.php ${OUTPUT_DIR} &> ${PREFIX}/sqlite.log\n" $(date +"%F %T")
php scripts/sqlite/normalize-issue-details.php ${OUTPUT_DIR} &> ${PREFIX}/sqlite.log

printf "%s %s> delete\n" $(date +"%F %T")
printf "%s %s> delete DB\n" $(date +"%F %T")
if [[ -e ${OUTPUT_DIR}/qa_catalogue.sqlite ]]; then
rm ${OUTPUT_DIR}/qa_catalogue.sqlite
fi
printf "%s %s> create\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/qa_catalogue.sqlite.sql

HAS_GROUP_PARAM=$(echo ${TYPE_PARAMS} | grep -c -P -e '--groupBy [^-]')
if [[ "${HAS_GROUP_PARAM}" == "0" ]]; then
printf "%s %s> create DB structure\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/qa_catalogue.sqlite.sql
else
printf "%s %s> create DB structure (groupped)\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/qa_catalogue.groupped.sqlite.sql
fi

printf "%s %s> create importable files\n" $(date +"%F %T")
tail -n +2 ${OUTPUT_DIR}/issue-details-normalized.csv > ${OUTPUT_DIR}/issue-details-normalized_noheader.csv
tail -n +2 ${OUTPUT_DIR}/issue-summary.csv > ${OUTPUT_DIR}/issue-summary_noheader.csv

printf "%s %s> import\n" $(date +"%F %T")
printf "%s %s> import issue details\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
.import ${OUTPUT_DIR}/issue-details-normalized_noheader.csv issue_details
EOF

printf "%s %s> import issue summary\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
.import ${OUTPUT_DIR}/issue-summary_noheader.csv issue_summary
EOF

printf "%s %s> delete importable files\n" $(date +"%F %T")
rm ${OUTPUT_DIR}/issue-details-normalized_noheader.csv
rm ${OUTPUT_DIR}/issue-summary_noheader.csv

printf "%s %s> index\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/modify-tables.sql &>> ${PREFIX}/sqlite.log
if [[ "${HAS_GROUP_PARAM}" == "0" ]]; then
printf "%s %s> index\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/modify-tables.sql &>> ${PREFIX}/sqlite.log
else
printf "%s %s> index (groupped)\n" $(date +"%F %T")
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/modify-tables.groupped.sql &>> ${PREFIX}/sqlite.log
fi
}

do_export_schema_files() {
Expand Down
27 changes: 27 additions & 0 deletions scripts/sqlite/modify-tables.groupped.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
--- issue_details indices
CREATE INDEX "errorId" ON "issue_details" ("errorId");
CREATE INDEX "recordId" ON "issue_details" ("recordId");

--- issue_summary indices
CREATE INDEX "groupId" ON "issue_summary" ("groupId");
CREATE INDEX "id" ON "issue_summary" ("id");
CREATE INDEX "categoryId" ON "issue_summary" ("categoryId");
CREATE INDEX "typeId" ON "issue_summary" ("typeId");

--- create issue_groups to speed up queries
CREATE TABLE issue_groups AS
SELECT groupId,
categoryId,
typeId,
s.MarcPath AS path,
COUNT(DISTINCT(s.id)) AS variants,
COUNT(DISTINCT(d.id)) AS records,
SUM(d.instances) AS instances
FROM issue_summary AS s
LEFT JOIN issue_details AS d ON (s.id = d.errorId)
GROUP BY groupId, categoryId, typeId, s.MarcPath;

--- issue_groups indices
CREATE INDEX "groupId" ON "issue_groups" ("groupId");
CREATE INDEX "categoryId" ON "issue_groups" ("categoryId");
CREATE INDEX "typeId" ON "issue_groups" ("typeId");
12 changes: 9 additions & 3 deletions scripts/sqlite/modify-tables.sql
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
--- issue_details indices
--- issue_details indices
CREATE INDEX "errorId" ON "issue_details" ("errorId");
CREATE INDEX "recordId" ON "issue_details" ("recordId");

--- issue_summary indices
--- issue_summary indices
CREATE INDEX "id" ON "issue_summary" ("id");
CREATE INDEX "categoryId" ON "issue_summary" ("categoryId");
CREATE INDEX "typeId" ON "issue_summary" ("typeId");

--- create issue_groups to speed up queries
CREATE TABLE issue_groups AS
SELECT categoryId, typeId, s.MarcPath AS path, COUNT(DISTINCT(s.id)) AS variants, COUNT(DISTINCT(d.id)) AS records, SUM(d.instances) AS instances
SELECT categoryId,
typeId,
s.MarcPath AS path,
COUNT(DISTINCT(s.id)) AS variants,
COUNT(DISTINCT(d.id)) AS records,
SUM(d.instances) AS instances
FROM issue_summary AS s
LEFT JOIN issue_details AS d ON (s.id = d.errorId)
GROUP BY categoryId, typeId, s.MarcPath;

--- issue_groups indices
CREATE INDEX "categoryId" ON "issue_groups" ("categoryId");
CREATE INDEX "typeId" ON "issue_groups" ("typeId");
19 changes: 19 additions & 0 deletions scripts/sqlite/qa_catalogue.groupped.sqlite.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
BEGIN TRANSACTION;
CREATE TABLE IF NOT EXISTS "issue_summary" (
"groupId" TEXT,
"id" INTEGER,
"MarcPath" TEXT,
"categoryId" INTEGER,
"typeId" INTEGER,
"type" TEXT,
"message" TEXT,
"url" TEXT,
"instances" INTEGER,
"records" INTEGER
);
CREATE TABLE IF NOT EXISTS "issue_details" (
"id" TEXT,
"errorId" INTEGER,
"instances" INTEGER
);
COMMIT;

0 comments on commit 8577101

Please sign in to comment.