|
1 | | -import { PrismaClient } from '@prisma/client'; |
| 1 | +import { PrismaClient, Prisma } from '@prisma/client'; |
2 | 2 | import * as path from 'path'; |
3 | 3 | import * as fs from 'fs'; |
4 | 4 | import * as readline from 'readline'; |
@@ -33,6 +33,7 @@ const DEFAULT_DATA_DIR = '/mnt/export/review_tables'; |
33 | 33 | const DATA_DIR = process.env.DATA_DIR || DEFAULT_DATA_DIR; |
34 | 34 | const batchSize = 1000; |
35 | 35 | const logSize = 20000; |
| 36 | +const submissionIsFileBatchSize = 500; |
36 | 37 | const DEFAULT_ES_DATA_FILE = path.join( |
37 | 38 | '/home/ubuntu', |
38 | 39 | 'submissions-api.data.json', |
@@ -64,6 +65,55 @@ if (shouldRepairMaps) { |
64 | 65 | ); |
65 | 66 | } |
66 | 67 |
|
| 68 | +const MIGRATION_TARGET_SUBMISSION_IS_FILE = 'submission-is-file'; |
| 69 | +const SUPPORTED_MIGRATION_TARGETS = new Set<string>([ |
| 70 | + MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 71 | +]); |
| 72 | +const MIGRATION_TARGET_ALIASES: Record<string, string> = { |
| 73 | + [MIGRATION_TARGET_SUBMISSION_IS_FILE]: MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 74 | + 'submission-is-file-flag': MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 75 | + 'submission-isfilesubmission': MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 76 | + 'submission:isfile': MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 77 | + 'submission:isfilesubmission': MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 78 | + 'is-file-submission': MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 79 | + submission_is_file: MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 80 | +}; |
| 81 | + |
| 82 | +const rawTargets = extractTargets(process.argv.slice(2)); |
| 83 | +const normalizedTargets: string[] = []; |
| 84 | +const unknownTargets: string[] = []; |
| 85 | + |
| 86 | +for (const target of rawTargets) { |
| 87 | + const normalized = normalizeTarget(target); |
| 88 | + if (normalized) { |
| 89 | + normalizedTargets.push(normalized); |
| 90 | + } else { |
| 91 | + unknownTargets.push(target); |
| 92 | + } |
| 93 | +} |
| 94 | + |
| 95 | +if (unknownTargets.length > 0) { |
| 96 | + throw new Error( |
| 97 | + `Unknown migration target(s): ${unknownTargets.join(', ')}. Supported targets: ${Array.from(SUPPORTED_MIGRATION_TARGETS).join(', ')}`, |
| 98 | + ); |
| 99 | +} |
| 100 | + |
| 101 | +const requestedTargets = new Set<string>(normalizedTargets); |
| 102 | +const runFullMigration = requestedTargets.size === 0; |
| 103 | +if (!runFullMigration && requestedTargets.size > 1) { |
| 104 | + throw new Error( |
| 105 | + `Multiple migration targets are not currently supported. Requested targets: ${Array.from(requestedTargets).join(', ')}`, |
| 106 | + ); |
| 107 | +} |
| 108 | +const runSubmissionIsFileTarget = requestedTargets.has( |
| 109 | + MIGRATION_TARGET_SUBMISSION_IS_FILE, |
| 110 | +); |
| 111 | +if (!runFullMigration) { |
| 112 | + console.log( |
| 113 | + `Executing targeted migration: ${Array.from(requestedTargets).join(', ')}`, |
| 114 | + ); |
| 115 | +} |
| 116 | + |
67 | 117 | const errorSummary = new Map< |
68 | 118 | string, |
69 | 119 | { count: number; files: Set<string>; examples: string[] } |
@@ -108,6 +158,73 @@ const shouldProcessRecord = ( |
108 | 158 | ); |
109 | 159 | }; |
110 | 160 |
|
| 161 | +const toBoolean = (value: unknown): boolean => { |
| 162 | + if (typeof value === 'boolean') { |
| 163 | + return value; |
| 164 | + } |
| 165 | + if (typeof value === 'number') { |
| 166 | + if (value === 1) { |
| 167 | + return true; |
| 168 | + } |
| 169 | + if (value === 0) { |
| 170 | + return false; |
| 171 | + } |
| 172 | + } |
| 173 | + if (typeof value === 'string') { |
| 174 | + const normalized = value.trim().toLowerCase(); |
| 175 | + if (['true', '1', 'yes', 'y'].includes(normalized)) { |
| 176 | + return true; |
| 177 | + } |
| 178 | + if ( |
| 179 | + ['false', '0', 'no', 'n', 'null', 'undefined', ''].includes(normalized) |
| 180 | + ) { |
| 181 | + return false; |
| 182 | + } |
| 183 | + } |
| 184 | + return Boolean(value); |
| 185 | +}; |
| 186 | + |
| 187 | +function extractTargets(args: string[]): string[] { |
| 188 | + const results: string[] = []; |
| 189 | + for (let i = 0; i < args.length; i += 1) { |
| 190 | + const arg = args[i]; |
| 191 | + if (arg === '--target' || arg === '--targets') { |
| 192 | + const value = args[i + 1]; |
| 193 | + if (!value) { |
| 194 | + throw new Error( |
| 195 | + `${arg} requires a value with comma-separated migration target names.`, |
| 196 | + ); |
| 197 | + } |
| 198 | + results.push( |
| 199 | + ...value |
| 200 | + .split(',') |
| 201 | + .map((item) => item.trim()) |
| 202 | + .filter((item) => item.length > 0), |
| 203 | + ); |
| 204 | + i += 1; |
| 205 | + continue; |
| 206 | + } |
| 207 | + const match = arg.match(/^--targets?=(.*)$/); |
| 208 | + if (match) { |
| 209 | + results.push( |
| 210 | + ...match[1] |
| 211 | + .split(',') |
| 212 | + .map((item) => item.trim()) |
| 213 | + .filter((item) => item.length > 0), |
| 214 | + ); |
| 215 | + } |
| 216 | + } |
| 217 | + return results; |
| 218 | +} |
| 219 | + |
| 220 | +function normalizeTarget(target: string): string | undefined { |
| 221 | + if (!target) { |
| 222 | + return undefined; |
| 223 | + } |
| 224 | + const normalized = target.trim().toLowerCase(); |
| 225 | + return MIGRATION_TARGET_ALIASES[normalized]; |
| 226 | +} |
| 227 | + |
111 | 228 | const buildLogPrefix = (type: string, file: string, subtype?: string) => |
112 | 229 | subtype ? `[${type}][${subtype}][${file}]` : `[${type}][${file}]`; |
113 | 230 |
|
@@ -689,6 +806,7 @@ function convertSubmissionES(esData): any { |
689 | 806 | legacyChallengeId, |
690 | 807 | submissionPhaseId: String(esData.submissionPhaseId), |
691 | 808 | fileType: esData.fileType, |
| 809 | + isFileSubmission: toBoolean(esData.isFileSubmission), |
692 | 810 | esId: esData.id, |
693 | 811 | submittedDate: esData.submittedDate ? new Date(esData.submittedDate) : null, |
694 | 812 | updatedBy: esData.updatedBy ?? null, |
@@ -717,6 +835,171 @@ function convertSubmissionES(esData): any { |
717 | 835 | return submission; |
718 | 836 | } |
719 | 837 |
|
| 838 | +async function migrateSubmissionIsFileFlagOnly() { |
| 839 | + console.log( |
| 840 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Using ElasticSearch export file: ${ES_DATA_FILE}`, |
| 841 | + ); |
| 842 | + if (!fs.existsSync(ES_DATA_FILE)) { |
| 843 | + throw new Error( |
| 844 | + `ElasticSearch export file not found at ${ES_DATA_FILE}. Set ES_DATA_FILE to override the default.`, |
| 845 | + ); |
| 846 | + } |
| 847 | + console.log( |
| 848 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Starting isFileSubmission synchronization...`, |
| 849 | + ); |
| 850 | + |
| 851 | + const pendingUpdates = new Map<string, boolean>(); |
| 852 | + const updatedLegacyIds = new Set<string>(); |
| 853 | + const missingExamples: string[] = []; |
| 854 | + |
| 855 | + let totalUpdatedRows = 0; |
| 856 | + let totalMissing = 0; |
| 857 | + let processedRecords = 0; |
| 858 | + let skippedMissingLegacyId = 0; |
| 859 | + let skippedOutsideWindow = 0; |
| 860 | + let parseErrors = 0; |
| 861 | + let lineNumber = 0; |
| 862 | + |
| 863 | + const flushPending = async () => { |
| 864 | + if (pendingUpdates.size === 0) { |
| 865 | + return; |
| 866 | + } |
| 867 | + const entries = Array.from(pendingUpdates.entries()); |
| 868 | + pendingUpdates.clear(); |
| 869 | + const valueTuples = entries.map( |
| 870 | + ([legacySubmissionId, isFileSubmission]) => |
| 871 | + Prisma.sql`(${legacySubmissionId}, ${isFileSubmission})`, |
| 872 | + ); |
| 873 | + let updatedRows: Array<{ legacySubmissionId: string }> = []; |
| 874 | + try { |
| 875 | + updatedRows = await prisma.$queryRaw< |
| 876 | + Array<{ legacySubmissionId: string }> |
| 877 | + >(Prisma.sql` |
| 878 | + WITH input("legacySubmissionId", "isFileSubmission") AS ( |
| 879 | + VALUES ${Prisma.join(valueTuples)} |
| 880 | + ) |
| 881 | + UPDATE "submission" AS s |
| 882 | + SET "isFileSubmission" = input."isFileSubmission" |
| 883 | + FROM input |
| 884 | + WHERE s."legacySubmissionId" = input."legacySubmissionId" |
| 885 | + RETURNING s."legacySubmissionId" AS "legacySubmissionId" |
| 886 | + `); |
| 887 | + } catch (err) { |
| 888 | + console.error( |
| 889 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Failed to update a batch of ${entries.length} submissions.`, |
| 890 | + ); |
| 891 | + throw err; |
| 892 | + } |
| 893 | + totalUpdatedRows += updatedRows.length; |
| 894 | + const batchUpdatedSet = new Set( |
| 895 | + updatedRows.map((row) => String(row.legacySubmissionId)), |
| 896 | + ); |
| 897 | + batchUpdatedSet.forEach((id) => updatedLegacyIds.add(id)); |
| 898 | + const batchMissing = entries.length - batchUpdatedSet.size; |
| 899 | + totalMissing += batchMissing; |
| 900 | + if (batchMissing > 0 && missingExamples.length < 5) { |
| 901 | + for (const [legacyId] of entries) { |
| 902 | + if (!batchUpdatedSet.has(legacyId)) { |
| 903 | + missingExamples.push(legacyId); |
| 904 | + if (missingExamples.length >= 5) { |
| 905 | + break; |
| 906 | + } |
| 907 | + } |
| 908 | + } |
| 909 | + } |
| 910 | + }; |
| 911 | + |
| 912 | + const fileStream = fs.createReadStream(ES_DATA_FILE); |
| 913 | + const rl = readline.createInterface({ |
| 914 | + input: fileStream, |
| 915 | + crlfDelay: Infinity, |
| 916 | + }); |
| 917 | + |
| 918 | + for await (const line of rl) { |
| 919 | + lineNumber += 1; |
| 920 | + if (!line) { |
| 921 | + continue; |
| 922 | + } |
| 923 | + let parsed: any; |
| 924 | + try { |
| 925 | + parsed = JSON.parse(line); |
| 926 | + } catch (err: any) { |
| 927 | + parseErrors += 1; |
| 928 | + if (parseErrors <= 5) { |
| 929 | + console.warn( |
| 930 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Failed to parse line ${lineNumber}: ${err?.message ?? err}`, |
| 931 | + ); |
| 932 | + } |
| 933 | + continue; |
| 934 | + } |
| 935 | + const source = parsed?._source; |
| 936 | + if (!source || source.resource !== 'submission') { |
| 937 | + continue; |
| 938 | + } |
| 939 | + if (source.legacySubmissionId == null) { |
| 940 | + skippedMissingLegacyId += 1; |
| 941 | + continue; |
| 942 | + } |
| 943 | + const createdAudit = source.created ?? source.submittedDate ?? null; |
| 944 | + const updatedAudit = source.updated ?? null; |
| 945 | + if (!shouldProcessRecord(createdAudit, updatedAudit)) { |
| 946 | + skippedOutsideWindow += 1; |
| 947 | + continue; |
| 948 | + } |
| 949 | + const legacySubmissionId = String(source.legacySubmissionId); |
| 950 | + const isFileSubmission = toBoolean(source.isFileSubmission); |
| 951 | + pendingUpdates.set(legacySubmissionId, isFileSubmission); |
| 952 | + processedRecords += 1; |
| 953 | + if (pendingUpdates.size >= submissionIsFileBatchSize) { |
| 954 | + await flushPending(); |
| 955 | + } |
| 956 | + if (processedRecords > 0 && processedRecords % logSize === 0) { |
| 957 | + console.log( |
| 958 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Queued ${processedRecords} submissions so far...`, |
| 959 | + ); |
| 960 | + } |
| 961 | + } |
| 962 | + |
| 963 | + await flushPending(); |
| 964 | + |
| 965 | + const distinctUpdated = updatedLegacyIds.size; |
| 966 | + |
| 967 | + console.log( |
| 968 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Processed ${processedRecords} submission records.`, |
| 969 | + ); |
| 970 | + console.log( |
| 971 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Updated ${totalUpdatedRows} row(s) affecting ${distinctUpdated} submission(s).`, |
| 972 | + ); |
| 973 | + if (totalMissing > 0) { |
| 974 | + console.warn( |
| 975 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Unable to locate ${totalMissing} submission(s) in the database.`, |
| 976 | + ); |
| 977 | + if (missingExamples.length > 0) { |
| 978 | + console.warn( |
| 979 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Missing legacySubmissionId examples: ${missingExamples.join(', ')}`, |
| 980 | + ); |
| 981 | + } |
| 982 | + } |
| 983 | + if (skippedMissingLegacyId > 0) { |
| 984 | + console.warn( |
| 985 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Skipped ${skippedMissingLegacyId} record(s) without a legacySubmissionId.`, |
| 986 | + ); |
| 987 | + } |
| 988 | + if (skippedOutsideWindow > 0) { |
| 989 | + console.log( |
| 990 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Skipped ${skippedOutsideWindow} record(s) outside the incremental window.`, |
| 991 | + ); |
| 992 | + } |
| 993 | + if (parseErrors > 5) { |
| 994 | + console.warn( |
| 995 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Encountered ${parseErrors} parse error(s) while reading the export.`, |
| 996 | + ); |
| 997 | + } |
| 998 | + console.log( |
| 999 | + `[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] isFileSubmission synchronization completed.`, |
| 1000 | + ); |
| 1001 | +} |
| 1002 | + |
720 | 1003 | async function migrateElasticSearch() { |
721 | 1004 | // migrate elastic search data |
722 | 1005 | const filepath = ES_DATA_FILE; |
@@ -3269,6 +3552,15 @@ async function migrateResourceSubmissions() { |
3269 | 3552 | } |
3270 | 3553 |
|
3271 | 3554 | async function migrate() { |
| 3555 | + if (!runFullMigration && runSubmissionIsFileTarget) { |
| 3556 | + await migrateSubmissionIsFileFlagOnly(); |
| 3557 | + return; |
| 3558 | + } |
| 3559 | + |
| 3560 | + if (!runFullMigration) { |
| 3561 | + throw new Error('No recognized migration targets were provided.'); |
| 3562 | + } |
| 3563 | + |
3272 | 3564 | if (!fs.existsSync(DATA_DIR)) { |
3273 | 3565 | throw new Error( |
3274 | 3566 | `DATA_DIR "${DATA_DIR}" does not exist. Set DATA_DIR to a valid export path.`, |
|
0 commit comments