Skip to content

Commit

Permalink
CUMULUS-3286:Handle get temporal info from non-existing CMR file (nas…
Browse files Browse the repository at this point in the history
…a#3490)

* CUMULUS-3286:Handle get temporal info from non-existing CMR file

* update api test timeout

* fix test

* update granule es mapping queryFields

* add unit test for s3ObjectExists
  • Loading branch information
jennyhliu committed Oct 9, 2023
1 parent 3e82063 commit 9493d00
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 38 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ Users/clients that do not make use of these endpoints will not be impacted.
- **CUMULUS-3095**
- Added back `rule` schema validation which is missing after RDS phase 3.
- Fixed a bug for creating rule with tags.
- **CUMULUS-3286**
- Fixed `@cumulus/cmrjs/cmr-utils/getGranuleTemporalInfo` and `@cumulus/message/Granules/getGranuleCmrTemporalInfo`
to handle non-existing cmr file.
- Updated mapping for granule and deletedgranule Elasticsearch records to prevent dynamic field for keys under
`queryFields`.
- **CUMULUS-3393**
- Fixed `PUT` collection endpoint to update collection configuration in S3.

Expand Down
6 changes: 4 additions & 2 deletions example/spec/parallel/orca/OrcaBackupAndRecoverySpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,11 @@ describe('The S3 Ingest Granules workflow', () => {

// copiedToOrca contains a list of the file s3uri in primary buckets
const copiedOver = await Promise.all(
filesCopiedToOrca.map((s3uri) => {
filesCopiedToOrca.map(async (s3uri) => {
expect(excludedFileExtensions.filter((type) => s3uri.endsWith(type)).length).toBe(0);
return s3ObjectExists({ Bucket: config.buckets.glacier.name, Key: parseS3Uri(s3uri).Key });
const parsedS3Uri = parseS3Uri(s3uri);
await deleteS3Object(parsedS3Uri.Bucket, parsedS3Uri.Key);
return s3ObjectExists({ Bucket: config.buckets.glacier.name, Key: parsedS3Uri.Key });
})
);
copiedOver.forEach((check) => expect(check).toEqual(true));
Expand Down
9 changes: 6 additions & 3 deletions packages/api/tests/endpoints/test-granules.js
Original file line number Diff line number Diff line change
Expand Up @@ -2059,6 +2059,9 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) =>
testExecutionCumulusId,
} = t.context;
const timestamp = Date.now();
const oldQueryFields = {
foo: Math.random(),
};
const { newPgGranule, esRecord } = await createGranuleAndFiles({
dbClient: knex,
esClient,
Expand All @@ -2067,6 +2070,7 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) =>
status: 'running',
execution: executionUrl,
timestamp: Date.now(),
queryFields: oldQueryFields,
},
});
const newApiGranule = await translatePostgresGranuleToApiGranule({
Expand All @@ -2075,9 +2079,9 @@ test.serial('PATCH updates an existing granule in all data stores', async (t) =>
});

t.is(newPgGranule.status, 'running');
t.is(newPgGranule.query_fields, null);
t.deepEqual(newPgGranule.query_fields, oldQueryFields);
t.is(esRecord.status, 'running');
t.is(esRecord.queryFields, undefined);
t.deepEqual(esRecord.queryFields, oldQueryFields);

const newQueryFields = {
foo: randomString(),
Expand Down Expand Up @@ -3562,7 +3566,6 @@ test.serial('PUT replaces an existing granule in all data stores, removing exist
executionUrl,
knex,
} = t.context;
console.log('foobar');
const {
apiGranule,
newPgGranule,
Expand Down
48 changes: 24 additions & 24 deletions packages/api/tests/lib/writeRecords/test-write-granules.js
Original file line number Diff line number Diff line change
Expand Up @@ -646,20 +646,20 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos
];

const completeGranule = fakeGranuleFactoryV2({
beginningDateTime: new Date().toString(),
beginningDateTime: new Date().toISOString(),
cmrLink: 'example.com',
collectionId: constructCollectionId(collection.name, collection.version),
createdAt: Date.now(),
duration: 1000,
endingDateTime: new Date().toString(),
endingDateTime: new Date().toISOString(),
error: { errorKey: 'errorValue' },
execution: executionUrl,
files: files,
lastUpdateDateTime: new Date().toString(),
lastUpdateDateTime: new Date().toISOString(),
pdrName: pdr.name,
processingEndDateTime: new Date().toString(),
processingStartDateTime: new Date().toString(),
productionDateTime: new Date().toString(),
processingEndDateTime: new Date().toISOString(),
processingStartDateTime: new Date().toISOString(),
productionDateTime: new Date().toISOString(),
productVolume: '1000',
provider: provider.name,
published: true,
Expand Down Expand Up @@ -842,20 +842,20 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos
} = t.context;

const completeGranule = fakeGranuleFactoryV2({
beginningDateTime: new Date().toString(),
beginningDateTime: new Date().toISOString(),
cmrLink: 'example.com',
collectionId: constructCollectionId(collection.name, collection.version),
createdAt: Date.now(),
duration: 1000,
endingDateTime: new Date().toString(),
endingDateTime: new Date().toISOString(),
error: { errorKey: 'errorValue' },
execution: executionUrl,
files: files,
lastUpdateDateTime: new Date().toString(),
lastUpdateDateTime: new Date().toISOString(),
pdrName: pdr.name,
processingEndDateTime: new Date().toString(),
processingStartDateTime: new Date().toString(),
productionDateTime: new Date().toString(),
processingEndDateTime: new Date().toISOString(),
processingStartDateTime: new Date().toISOString(),
productionDateTime: new Date().toISOString(),
productVolume: '1000',
provider: provider.name,
published: true,
Expand Down Expand Up @@ -989,18 +989,18 @@ test.serial('writeGranulesFromMessage() on re-write saves granule records to Pos
];

const completeGranule = fakeGranuleFactoryV2({
beginningDateTime: new Date().toString(),
beginningDateTime: new Date().toISOString(),
cmrLink: 'example.com',
collectionId: constructCollectionId(collection.name, collection.version),
duration: 1000,
endingDateTime: new Date().toString(),
endingDateTime: new Date().toISOString(),
error: { errorKey: 'errorValue' },
files: files,
lastUpdateDateTime: new Date().toString(),
lastUpdateDateTime: new Date().toISOString(),
pdrName: pdr.name,
processingEndDateTime: new Date().toString(),
processingStartDateTime: new Date().toString(),
productionDateTime: new Date().toString(),
processingEndDateTime: new Date().toISOString(),
processingStartDateTime: new Date().toISOString(),
productionDateTime: new Date().toISOString(),
productVolume: '1000',
provider: provider.name,
published: true,
Expand Down Expand Up @@ -3968,21 +3968,21 @@ test.serial('writeGranuleFromApi() given a granule with all fields populated is
];

const completeGranule = fakeGranuleFactoryV2({
beginningDateTime: new Date().toString(),
beginningDateTime: new Date().toISOString(),
cmrLink: 'example.com',
collectionId: constructCollectionId(collection.name, collection.version),
createdAt: Date.now(),
duration: 1000,
endingDateTime: new Date().toString(),
endingDateTime: new Date().toISOString(),
error: { errorKey: 'errorValue' },
execution: executionUrl,
files: files,
granuleId: granuleId,
lastUpdateDateTime: new Date().toString(),
lastUpdateDateTime: new Date().toISOString(),
pdrName: pdr.name,
processingEndDateTime: new Date().toString(),
processingStartDateTime: new Date().toString(),
productionDateTime: new Date().toString(),
processingEndDateTime: new Date().toISOString(),
processingStartDateTime: new Date().toISOString(),
productionDateTime: new Date().toISOString(),
productVolume: '1000',
provider: provider.name,
published: true,
Expand Down
14 changes: 14 additions & 0 deletions packages/aws-client/tests/test-S3.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ const {
deleteS3Objects,
promiseS3Upload,
fileExists,
s3ObjectExists,
} = require('../S3');
const awsServices = require('../services');
const { streamToString } = require('../test-utils');
Expand Down Expand Up @@ -501,3 +502,16 @@ test('fileExists() correctly returns false for non-existent file', async (t) =>
const { Bucket } = t.context;
t.false(await fileExists(Bucket, randomString()));
});

test('s3ObjectExists() returns true for existing file', async (t) => {
const { Bucket } = t.context;

const { Key } = await stageTestObjectToLocalStack(Bucket, 'asdf');
t.true(await s3ObjectExists({ Bucket, Key }));
});

test('s3ObjectExists() returns false for non-existent file', async (t) => {
const { Bucket } = t.context;
t.false(await s3ObjectExists({ Bucket, Key: randomString() }));
t.false(await s3ObjectExists({ Bucket: randomString(), Key: randomString() }));
});
6 changes: 6 additions & 0 deletions packages/cmrjs/src/cmr-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const {
parseS3Uri,
promiseS3Upload,
s3GetObjectTagging,
s3ObjectExists,
s3TagSetToQueryString,
waitForObject,
getObjectStreamContents,
Expand Down Expand Up @@ -1098,6 +1099,11 @@ async function getGranuleTemporalInfo(granule) {

const cmrFilename = getS3UrlOfFile(cmrFile[0]);

if (!(await s3ObjectExists(parseS3Uri(cmrFilename)))) {
log.warn(`getGranuleTemporalInfo cmr file does not exist ${cmrFilename}`);
return {};
}

if (isCMRISOFilename(cmrFilename)) {
const metadata = await metadataObjectFromCMRXMLFile(cmrFilename);
const metadataMI = get(
Expand Down
37 changes: 29 additions & 8 deletions packages/cmrjs/tests/cmr-utils/test-cmr-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ const { promisify } = require('util');
const pickAll = require('lodash/fp/pickAll');
const {
buildS3Uri,
createBucket,
getS3Object,
parseS3Uri,
promiseS3Upload,
putJsonS3Object,
recursivelyDeleteS3Bucket,
s3GetObjectTagging,
s3TagSetToQueryString,
Expand Down Expand Up @@ -102,6 +104,12 @@ test.before(async (t) => {
t.context.distributionBucketMap = Object.fromEntries(
Object.values(bucketsJson).map(({ name }) => [name, name])
);

t.context.cmrFileBucket = randomId('bucket');
await createBucket(t.context.cmrFileBucket);
await putJsonS3Object(t.context.cmrFileBucket, 'test.cmr.json', { foo: 'bar' });
await putJsonS3Object(t.context.cmrFileBucket, 'test.cmr.xml', { foo: 'bar' });
await putJsonS3Object(t.context.cmrFileBucket, 'test.cmr_iso.xml', { foo: 'bar' });
});

test.after.always(async (t) => {
Expand All @@ -114,6 +122,7 @@ test.after.always(async (t) => {
SecretId: cmrPasswordSecret,
ForceDeleteWithoutRecovery: true,
}).promise(),
await recursivelyDeleteS3Bucket(t.context.cmrFileBucket),
]);

t.context.launchpadStub.restore();
Expand Down Expand Up @@ -741,7 +750,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.json',
}],
});
Expand All @@ -768,7 +777,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.json',
}],
});
Expand All @@ -795,7 +804,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.json',
}],
});
Expand All @@ -822,7 +831,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.json',
}],
});
Expand All @@ -849,7 +858,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.xml',
}],
});
Expand All @@ -876,7 +885,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr.xml',
}],
});
Expand All @@ -903,7 +912,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr_iso.xml',
}],
});
Expand All @@ -930,7 +939,7 @@ test.serial('getGranuleTemporalInfo returns temporal information from granule CM
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
bucket: 'bucket',
bucket: t.context.cmrFileBucket,
key: 'test.cmr_iso.xml',
}],
});
Expand All @@ -953,6 +962,18 @@ test.serial('getGranuleTemporalInfo returns empty object if cmr file s3 url is n
t.deepEqual(temporalInfo, {});
});

test.serial('getGranuleTemporalInfo returns empty object if cmr file s3 does not exist', async (t) => {
const temporalInfo = await getGranuleTemporalInfo({
granuleId: 'testGranuleId',
files: [{
path: t.context.cmrFileBucket,
name: 'not-exist.cmr_iso.xml',
}],
});

t.deepEqual(temporalInfo, {});
});

test.serial('generateFileUrl generates correct url for cmrGranuleUrlType distribution', (t) => {
const filename = 's3://fake-bucket/folder/key.txt';
const distEndpoint = 'www.example.com/';
Expand Down
8 changes: 8 additions & 0 deletions packages/es-client/config/mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@
"published": {
"type": "boolean"
},
"queryFields": {
"type": "object",
"dynamic": false
},
"duration": {
"type": "float"
},
Expand Down Expand Up @@ -349,6 +353,10 @@
"published": {
"type": "boolean"
},
"queryFields": {
"type": "object",
"dynamic": false
},
"duration": {
"type": "float"
},
Expand Down
10 changes: 9 additions & 1 deletion packages/message/src/Granules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
* const Granules = require('@cumulus/message/Granules');
*/

import isEmpty from 'lodash/isEmpty';
import isInteger from 'lodash/isInteger';
import isUndefined from 'lodash/isUndefined';
import mapValues from 'lodash/mapValues';
import omitBy from 'lodash/omitBy';
import pick from 'lodash/pick';

import { CumulusMessageError } from '@cumulus/errors';
import { Message } from '@cumulus/types';
Expand Down Expand Up @@ -171,7 +173,8 @@ function isGranuleTemporalInfo(
}

/**
* Get granule temporal information from argument or directly from CMR.
* Get granule temporal information from argument, directly from CMR
* file or from granule object.
*
* Converts temporal information timestamps to a standardized ISO string
* format for compatibility across database systems.
Expand All @@ -196,6 +199,11 @@ export const getGranuleCmrTemporalInfo = async ({
const temporalInfo = isGranuleTemporalInfo(cmrTemporalInfo)
? { ...cmrTemporalInfo }
: await cmrUtils.getGranuleTemporalInfo(granule);

if (isEmpty(temporalInfo)) {
return pick(granule, ['beginningDateTime', 'endingDateTime', 'productionDateTime', 'lastUpdateDateTime']);
}

return mapValues(
temporalInfo,
convertDateToISOStringPreservingNull
Expand Down
Loading

0 comments on commit 9493d00

Please sign in to comment.