Skip to content

Commit c2ca179

Browse files
authored
Merge pull request #131 from topcoder-platform/performance
Performance tweaks for large account in prod
2 parents cb2b48d + 477d1a8 commit c2ca179

File tree

8 files changed

+421
-4
lines changed

8 files changed

+421
-4
lines changed

prisma/migrate.ts

Lines changed: 293 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { PrismaClient } from '@prisma/client';
1+
import { PrismaClient, Prisma } from '@prisma/client';
22
import * as path from 'path';
33
import * as fs from 'fs';
44
import * as readline from 'readline';
@@ -33,6 +33,7 @@ const DEFAULT_DATA_DIR = '/mnt/export/review_tables';
3333
const DATA_DIR = process.env.DATA_DIR || DEFAULT_DATA_DIR;
3434
const batchSize = 1000;
3535
const logSize = 20000;
36+
const submissionIsFileBatchSize = 500;
3637
const DEFAULT_ES_DATA_FILE = path.join(
3738
'/home/ubuntu',
3839
'submissions-api.data.json',
@@ -64,6 +65,55 @@ if (shouldRepairMaps) {
6465
);
6566
}
6667

68+
const MIGRATION_TARGET_SUBMISSION_IS_FILE = 'submission-is-file';
69+
const SUPPORTED_MIGRATION_TARGETS = new Set<string>([
70+
MIGRATION_TARGET_SUBMISSION_IS_FILE,
71+
]);
72+
const MIGRATION_TARGET_ALIASES: Record<string, string> = {
73+
[MIGRATION_TARGET_SUBMISSION_IS_FILE]: MIGRATION_TARGET_SUBMISSION_IS_FILE,
74+
'submission-is-file-flag': MIGRATION_TARGET_SUBMISSION_IS_FILE,
75+
'submission-isfilesubmission': MIGRATION_TARGET_SUBMISSION_IS_FILE,
76+
'submission:isfile': MIGRATION_TARGET_SUBMISSION_IS_FILE,
77+
'submission:isfilesubmission': MIGRATION_TARGET_SUBMISSION_IS_FILE,
78+
'is-file-submission': MIGRATION_TARGET_SUBMISSION_IS_FILE,
79+
submission_is_file: MIGRATION_TARGET_SUBMISSION_IS_FILE,
80+
};
81+
82+
const rawTargets = extractTargets(process.argv.slice(2));
83+
const normalizedTargets: string[] = [];
84+
const unknownTargets: string[] = [];
85+
86+
for (const target of rawTargets) {
87+
const normalized = normalizeTarget(target);
88+
if (normalized) {
89+
normalizedTargets.push(normalized);
90+
} else {
91+
unknownTargets.push(target);
92+
}
93+
}
94+
95+
if (unknownTargets.length > 0) {
96+
throw new Error(
97+
`Unknown migration target(s): ${unknownTargets.join(', ')}. Supported targets: ${Array.from(SUPPORTED_MIGRATION_TARGETS).join(', ')}`,
98+
);
99+
}
100+
101+
const requestedTargets = new Set<string>(normalizedTargets);
102+
const runFullMigration = requestedTargets.size === 0;
103+
if (!runFullMigration && requestedTargets.size > 1) {
104+
throw new Error(
105+
`Multiple migration targets are not currently supported. Requested targets: ${Array.from(requestedTargets).join(', ')}`,
106+
);
107+
}
108+
const runSubmissionIsFileTarget = requestedTargets.has(
109+
MIGRATION_TARGET_SUBMISSION_IS_FILE,
110+
);
111+
if (!runFullMigration) {
112+
console.log(
113+
`Executing targeted migration: ${Array.from(requestedTargets).join(', ')}`,
114+
);
115+
}
116+
67117
const errorSummary = new Map<
68118
string,
69119
{ count: number; files: Set<string>; examples: string[] }
@@ -108,6 +158,73 @@ const shouldProcessRecord = (
108158
);
109159
};
110160

161+
const toBoolean = (value: unknown): boolean => {
162+
if (typeof value === 'boolean') {
163+
return value;
164+
}
165+
if (typeof value === 'number') {
166+
if (value === 1) {
167+
return true;
168+
}
169+
if (value === 0) {
170+
return false;
171+
}
172+
}
173+
if (typeof value === 'string') {
174+
const normalized = value.trim().toLowerCase();
175+
if (['true', '1', 'yes', 'y'].includes(normalized)) {
176+
return true;
177+
}
178+
if (
179+
['false', '0', 'no', 'n', 'null', 'undefined', ''].includes(normalized)
180+
) {
181+
return false;
182+
}
183+
}
184+
return Boolean(value);
185+
};
186+
187+
function extractTargets(args: string[]): string[] {
188+
const results: string[] = [];
189+
for (let i = 0; i < args.length; i += 1) {
190+
const arg = args[i];
191+
if (arg === '--target' || arg === '--targets') {
192+
const value = args[i + 1];
193+
if (!value) {
194+
throw new Error(
195+
`${arg} requires a value with comma-separated migration target names.`,
196+
);
197+
}
198+
results.push(
199+
...value
200+
.split(',')
201+
.map((item) => item.trim())
202+
.filter((item) => item.length > 0),
203+
);
204+
i += 1;
205+
continue;
206+
}
207+
const match = arg.match(/^--targets?=(.*)$/);
208+
if (match) {
209+
results.push(
210+
...match[1]
211+
.split(',')
212+
.map((item) => item.trim())
213+
.filter((item) => item.length > 0),
214+
);
215+
}
216+
}
217+
return results;
218+
}
219+
220+
function normalizeTarget(target: string): string | undefined {
221+
if (!target) {
222+
return undefined;
223+
}
224+
const normalized = target.trim().toLowerCase();
225+
return MIGRATION_TARGET_ALIASES[normalized];
226+
}
227+
111228
const buildLogPrefix = (type: string, file: string, subtype?: string) =>
112229
subtype ? `[${type}][${subtype}][${file}]` : `[${type}][${file}]`;
113230

@@ -689,6 +806,7 @@ function convertSubmissionES(esData): any {
689806
legacyChallengeId,
690807
submissionPhaseId: String(esData.submissionPhaseId),
691808
fileType: esData.fileType,
809+
isFileSubmission: toBoolean(esData.isFileSubmission),
692810
esId: esData.id,
693811
submittedDate: esData.submittedDate ? new Date(esData.submittedDate) : null,
694812
updatedBy: esData.updatedBy ?? null,
@@ -717,6 +835,171 @@ function convertSubmissionES(esData): any {
717835
return submission;
718836
}
719837

838+
async function migrateSubmissionIsFileFlagOnly() {
839+
console.log(
840+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Using ElasticSearch export file: ${ES_DATA_FILE}`,
841+
);
842+
if (!fs.existsSync(ES_DATA_FILE)) {
843+
throw new Error(
844+
`ElasticSearch export file not found at ${ES_DATA_FILE}. Set ES_DATA_FILE to override the default.`,
845+
);
846+
}
847+
console.log(
848+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Starting isFileSubmission synchronization...`,
849+
);
850+
851+
const pendingUpdates = new Map<string, boolean>();
852+
const updatedLegacyIds = new Set<string>();
853+
const missingExamples: string[] = [];
854+
855+
let totalUpdatedRows = 0;
856+
let totalMissing = 0;
857+
let processedRecords = 0;
858+
let skippedMissingLegacyId = 0;
859+
let skippedOutsideWindow = 0;
860+
let parseErrors = 0;
861+
let lineNumber = 0;
862+
863+
const flushPending = async () => {
864+
if (pendingUpdates.size === 0) {
865+
return;
866+
}
867+
const entries = Array.from(pendingUpdates.entries());
868+
pendingUpdates.clear();
869+
const valueTuples = entries.map(
870+
([legacySubmissionId, isFileSubmission]) =>
871+
Prisma.sql`(${legacySubmissionId}, ${isFileSubmission})`,
872+
);
873+
let updatedRows: Array<{ legacySubmissionId: string }> = [];
874+
try {
875+
updatedRows = await prisma.$queryRaw<
876+
Array<{ legacySubmissionId: string }>
877+
>(Prisma.sql`
878+
WITH input("legacySubmissionId", "isFileSubmission") AS (
879+
VALUES ${Prisma.join(valueTuples)}
880+
)
881+
UPDATE "submission" AS s
882+
SET "isFileSubmission" = input."isFileSubmission"
883+
FROM input
884+
WHERE s."legacySubmissionId" = input."legacySubmissionId"
885+
RETURNING s."legacySubmissionId" AS "legacySubmissionId"
886+
`);
887+
} catch (err) {
888+
console.error(
889+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Failed to update a batch of ${entries.length} submissions.`,
890+
);
891+
throw err;
892+
}
893+
totalUpdatedRows += updatedRows.length;
894+
const batchUpdatedSet = new Set(
895+
updatedRows.map((row) => String(row.legacySubmissionId)),
896+
);
897+
batchUpdatedSet.forEach((id) => updatedLegacyIds.add(id));
898+
const batchMissing = entries.length - batchUpdatedSet.size;
899+
totalMissing += batchMissing;
900+
if (batchMissing > 0 && missingExamples.length < 5) {
901+
for (const [legacyId] of entries) {
902+
if (!batchUpdatedSet.has(legacyId)) {
903+
missingExamples.push(legacyId);
904+
if (missingExamples.length >= 5) {
905+
break;
906+
}
907+
}
908+
}
909+
}
910+
};
911+
912+
const fileStream = fs.createReadStream(ES_DATA_FILE);
913+
const rl = readline.createInterface({
914+
input: fileStream,
915+
crlfDelay: Infinity,
916+
});
917+
918+
for await (const line of rl) {
919+
lineNumber += 1;
920+
if (!line) {
921+
continue;
922+
}
923+
let parsed: any;
924+
try {
925+
parsed = JSON.parse(line);
926+
} catch (err: any) {
927+
parseErrors += 1;
928+
if (parseErrors <= 5) {
929+
console.warn(
930+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Failed to parse line ${lineNumber}: ${err?.message ?? err}`,
931+
);
932+
}
933+
continue;
934+
}
935+
const source = parsed?._source;
936+
if (!source || source.resource !== 'submission') {
937+
continue;
938+
}
939+
if (source.legacySubmissionId == null) {
940+
skippedMissingLegacyId += 1;
941+
continue;
942+
}
943+
const createdAudit = source.created ?? source.submittedDate ?? null;
944+
const updatedAudit = source.updated ?? null;
945+
if (!shouldProcessRecord(createdAudit, updatedAudit)) {
946+
skippedOutsideWindow += 1;
947+
continue;
948+
}
949+
const legacySubmissionId = String(source.legacySubmissionId);
950+
const isFileSubmission = toBoolean(source.isFileSubmission);
951+
pendingUpdates.set(legacySubmissionId, isFileSubmission);
952+
processedRecords += 1;
953+
if (pendingUpdates.size >= submissionIsFileBatchSize) {
954+
await flushPending();
955+
}
956+
if (processedRecords > 0 && processedRecords % logSize === 0) {
957+
console.log(
958+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Queued ${processedRecords} submissions so far...`,
959+
);
960+
}
961+
}
962+
963+
await flushPending();
964+
965+
const distinctUpdated = updatedLegacyIds.size;
966+
967+
console.log(
968+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Processed ${processedRecords} submission records.`,
969+
);
970+
console.log(
971+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Updated ${totalUpdatedRows} row(s) affecting ${distinctUpdated} submission(s).`,
972+
);
973+
if (totalMissing > 0) {
974+
console.warn(
975+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Unable to locate ${totalMissing} submission(s) in the database.`,
976+
);
977+
if (missingExamples.length > 0) {
978+
console.warn(
979+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Missing legacySubmissionId examples: ${missingExamples.join(', ')}`,
980+
);
981+
}
982+
}
983+
if (skippedMissingLegacyId > 0) {
984+
console.warn(
985+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Skipped ${skippedMissingLegacyId} record(s) without a legacySubmissionId.`,
986+
);
987+
}
988+
if (skippedOutsideWindow > 0) {
989+
console.log(
990+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Skipped ${skippedOutsideWindow} record(s) outside the incremental window.`,
991+
);
992+
}
993+
if (parseErrors > 5) {
994+
console.warn(
995+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] Encountered ${parseErrors} parse error(s) while reading the export.`,
996+
);
997+
}
998+
console.log(
999+
`[${MIGRATION_TARGET_SUBMISSION_IS_FILE}] isFileSubmission synchronization completed.`,
1000+
);
1001+
}
1002+
7201003
async function migrateElasticSearch() {
7211004
// migrate elastic search data
7221005
const filepath = ES_DATA_FILE;
@@ -3269,6 +3552,15 @@ async function migrateResourceSubmissions() {
32693552
}
32703553

32713554
async function migrate() {
3555+
if (!runFullMigration && runSubmissionIsFileTarget) {
3556+
await migrateSubmissionIsFileFlagOnly();
3557+
return;
3558+
}
3559+
3560+
if (!runFullMigration) {
3561+
throw new Error('No recognized migration targets were provided.');
3562+
}
3563+
32723564
if (!fs.existsSync(DATA_DIR)) {
32733565
throw new Error(
32743566
`DATA_DIR "${DATA_DIR}" does not exist. Set DATA_DIR to a valid export path.`,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Add isFileSubmission flag to submissions to differentiate file vs URL entries
2+
ALTER TABLE "submission"
3+
ADD COLUMN IF NOT EXISTS "isFileSubmission" BOOLEAN NOT NULL DEFAULT false;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Improve review lookups that combine submission/phase filters with ordering
2+
CREATE INDEX "review_submissionId_id_idx" ON "review"("submissionId", "id");
3+
CREATE INDEX "review_phaseId_id_idx" ON "review"("phaseId", "id");
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Add a composite index to speed up paginated lookups by review item
2+
CREATE INDEX IF NOT EXISTS "reviewItemComment_reviewItemId_sortOrder_id_idx"
3+
ON "reviews"."reviewItemComment"("reviewItemId", "sortOrder", "id");

prisma/schema.prisma

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,10 @@ model review {
180180
181181
@@index([committed]) // Index for filtering by committed status
182182
@@index([submissionId]) // Index for filtering by submission
183+
@@index([submissionId, id]) // Helps ORDER BY id after filtering by submission
183184
@@index([resourceId]) // Index for filtering by resource (reviewer)
184185
@@index([phaseId]) // Index for filtering by phase
186+
@@index([phaseId, id]) // Helps ORDER BY id after filtering by phase
185187
@@index([scorecardId]) // Index for joining with scorecard table
186188
@@index([status]) // Index for filtering by review status
187189
@@index([status, phaseId])
@@ -228,6 +230,7 @@ model reviewItemComment {
228230
appeal appeal?
229231
230232
@@index([reviewItemId]) // Index for joining with reviewItem table
233+
@@index([reviewItemId, sortOrder, id]) // Helps ordered pagination on review item comments
231234
@@index([id]) // Index for direct ID lookups
232235
@@index([resourceId]) // Index for filtering by resource (commenter)
233236
@@index([type]) // Index for filtering by comment type
@@ -397,6 +400,7 @@ model submission {
397400
fileSize Int?
398401
viewCount Int?
399402
systemFileName String?
403+
isFileSubmission Boolean @default(false)
400404
thurgoodJobId String?
401405
virusScan Boolean @default(false)
402406

0 commit comments

Comments
 (0)