Skip to content
Permalink
Browse files

Use HEAD to check if a file exists on S3.

The previous implementation searched for the file in a list which worked but was not optimal.  For arbitrary bucket structures it would also produce a false negative if a match was not found in the first 1000 entries.  This was not an issue for our repo structure since the max hits on exists calls is two but it seems worth fixing to avoid future complications.
  • Loading branch information...
dwsteele committed Jun 4, 2019
1 parent 15b8e3b commit 44eb21ea935fdaa4e96d4637c7b62cb5a73d3e77
Showing with 9 additions and 62 deletions.
  1. +1 −0 src/common/io/http/client.c
  2. +2 −0 src/common/io/http/client.h
  3. +2 −25 src/storage/s3/storage.c
  4. +4 −37 test/src/module/storage/s3Test.c
@@ -21,6 +21,7 @@ Http constants

STRING_EXTERN(HTTP_VERB_DELETE_STR, HTTP_VERB_DELETE);
STRING_EXTERN(HTTP_VERB_GET_STR, HTTP_VERB_GET);
STRING_EXTERN(HTTP_VERB_HEAD_STR, HTTP_VERB_HEAD);
STRING_EXTERN(HTTP_VERB_POST_STR, HTTP_VERB_POST);
STRING_EXTERN(HTTP_VERB_PUT_STR, HTTP_VERB_PUT);

@@ -32,6 +32,8 @@ HTTP Constants
STRING_DECLARE(HTTP_VERB_DELETE_STR);
#define HTTP_VERB_GET "GET"
STRING_DECLARE(HTTP_VERB_GET_STR);
#define HTTP_VERB_HEAD "HEAD"
STRING_DECLARE(HTTP_VERB_HEAD_STR);
#define HTTP_VERB_POST "POST"
STRING_DECLARE(HTTP_VERB_POST_STR);
#define HTTP_VERB_PUT "PUT"
@@ -358,31 +358,8 @@ storageS3Exists(THIS_VOID, const String *file)

MEM_CONTEXT_TEMP_BEGIN()
{
HttpQuery *query = httpQueryNew();

// Generate the file name as a prefix. Muliple files may be returned but this will narrow down the list.
String *prefix = strNewFmt("%s", strPtr(strSub(file, 1)));
httpQueryAdd(query, S3_QUERY_PREFIX_STR, prefix);

// Build the query using list type 2
httpQueryAdd(query, S3_QUERY_LIST_TYPE_STR, S3_QUERY_VALUE_LIST_TYPE_2_STR);

XmlNode *xmlRoot = xmlDocumentRoot(
xmlDocumentNewBuf(storageS3Request(this, HTTP_VERB_GET_STR, FSLASH_STR, query, NULL, true, false).response));

// Check if the prefix exists. If not then the file definitely does not exist, but if it does we'll need to check the
// exact name to be sure we are not looking at a different file with the same prefix
XmlNodeList *fileList = xmlNodeChildList(xmlRoot, S3_XML_TAG_CONTENTS_STR);

for (unsigned int fileIdx = 0; fileIdx < xmlNodeLstSize(fileList); fileIdx++)
{
// If the name matches exactly then the file exists
if (strEq(prefix, xmlNodeContent(xmlNodeChild(xmlNodeLstGet(fileList, fileIdx), S3_XML_TAG_KEY_STR, true))))
{
result = true;
break;
}
}
storageS3Request(this, HTTP_VERB_HEAD_STR, file, NULL, NULL, false, true);
result = httpClientResponseCodeOk(this->httpClient);
}
MEM_CONTEXT_TEMP_END();

@@ -172,43 +172,12 @@ testS3Server(void)
// storageDriverExists()
// -------------------------------------------------------------------------------------------------------------------------
// File missing
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?list-type=2&prefix=BOGUS", NULL));
harnessTlsServerReply(
testS3ServerResponse(
200, "OK", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<ListBucketResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">"
"</ListBucketResult>"));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_HEAD, "/BOGUS", NULL));
harnessTlsServerReply(testS3ServerResponse(404, "Not Found", NULL, NULL));

// File exists
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?list-type=2&prefix=subdir%2Ffile1.txt", NULL));
harnessTlsServerReply(
testS3ServerResponse(
200, "OK", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<ListBucketResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">"
" <Contents>"
" <Key>subdir/file1.txts</Key>"
" </Contents>"
" <Contents>"
" <Key>subdir/file1.txt</Key>"
" </Contents>"
"</ListBucketResult>"));

// File does not exist but files with same prefix do
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?list-type=2&prefix=subdir%2Ffile1.txt", NULL));
harnessTlsServerReply(
testS3ServerResponse(
200, "OK", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<ListBucketResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">"
" <Contents>"
" <Key>subdir/file1.txta</Key>"
" </Contents>"
" <Contents>"
" <Key>subdir/file1.txtb</Key>"
" </Contents>"
"</ListBucketResult>"));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_HEAD, "/subdir/file1.txt", NULL));
harnessTlsServerReply(testS3ServerResponse(200, "OK", "content-length:999", NULL));

// storageDriverList()
// -------------------------------------------------------------------------------------------------------------------------
@@ -738,8 +707,6 @@ testRun(void)
// -------------------------------------------------------------------------------------------------------------------------
TEST_RESULT_BOOL(storageExistsNP(s3, strNew("BOGUS")), false, "file does not exist");
TEST_RESULT_BOOL(storageExistsNP(s3, strNew("subdir/file1.txt")), true, "file exists");
TEST_RESULT_BOOL(
storageExistsNP(s3, strNew("subdir/file1.txt")), false, "file does not exist but files with same prefix do");

// storageDriverList()
// -------------------------------------------------------------------------------------------------------------------------

0 comments on commit 44eb21e

Please sign in to comment.
You can’t perform that action at this time.