Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate string with size > 4096 for CSV copier #819

Merged
merged 1 commit into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "src/common/include/csv_reader/csv_reader.h"

#include "spdlog/spdlog.h"

#include "src/common/include/configs.h"
#include "src/common/include/type_utils.h"
#include "src/common/include/utils.h"
Expand Down Expand Up @@ -35,7 +37,8 @@ CSVReader::CSVReader(
: fd{nullptr}, config{config}, nextLineIsNotProcessed{false}, isEndOfBlock{false},
nextTokenIsNotProcessed{false}, line{line}, lineCapacity{1024}, lineLen{lineLen},
linePtrStart{linePtrStart}, linePtrEnd{linePtrStart}, readingBlockStartOffset{0},
readingBlockEndOffset{UINT64_MAX}, nextTokenLen{UINT64_MAX} {}
readingBlockEndOffset{UINT64_MAX},
nextTokenLen{UINT64_MAX}, logger{LoggerUtils::getOrCreateSpdLogger("csv_reader")} {}

CSVReader::~CSVReader() {
// fd can be nullptr when the CSVReader is constructed by passing a char*, so it is reading over
Expand Down Expand Up @@ -182,7 +185,9 @@ char* CSVReader::getString() {
setNextTokenIsProcessed();
auto strVal = line + linePtrStart;
if (strlen(strVal) > DEFAULT_PAGE_SIZE) {
throw CSVReaderException(StringUtils::getLongStringErrorMessage(strVal, DEFAULT_PAGE_SIZE));
logger->warn(StringUtils::getLongStringErrorMessage(strVal, DEFAULT_PAGE_SIZE));
// If the string is too long, truncate it.
strVal[DEFAULT_PAGE_SIZE] = '\0';
}
return strVal;
}
Expand Down
5 changes: 5 additions & 0 deletions src/common/include/csv_reader/csv_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

using namespace std;

namespace spdlog {
class logger;
}

namespace graphflow {
namespace common {

Expand Down Expand Up @@ -85,6 +89,7 @@ class CSVReader {
private:
FILE* fd;
const CSVReaderConfig& config;
shared_ptr<spdlog::logger> logger;
bool nextLineIsNotProcessed, isEndOfBlock, nextTokenIsNotProcessed;
char* line;
size_t lineCapacity, lineLen;
Expand Down
11 changes: 0 additions & 11 deletions test/copy_csv/copy_csv_fault_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ class CopyCSVFaultTest : public EmptyDBTest {
}
};

class CopyCSVLongStringTest : public CopyCSVFaultTest {
string getInputCSVDir() override { return "dataset/copy-csv-fault-tests/long-string/"; }
};

class CopyCSVDuplicateIDTest : public CopyCSVFaultTest {
string getInputCSVDir() override { return "dataset/copy-csv-fault-tests/duplicate-ids/"; }
};
Expand All @@ -32,13 +28,6 @@ class CopyNodeCSVUnmatchedColumnTypeTest : public CopyCSVFaultTest {
string getInputCSVDir() override { return "dataset/copy-csv-fault-tests/long-string/"; }
};

TEST_F(CopyCSVLongStringTest, LongStringError) {
ASSERT_EQ(getCopyCSVException(),
"Failed to execute statement: COPY person FROM "
"\"dataset/copy-csv-fault-tests/long-string/vPerson.csv\".\nError: CSVReader "
"exception: Maximum length of strings is 4096. Input string's length is 5625.");
}

TEST_F(CopyCSVDuplicateIDTest, DuplicateIDsError) {
ASSERT_EQ(getCopyCSVException(),
"Failed to execute statement: COPY person FROM "
Expand Down
28 changes: 28 additions & 0 deletions test/copy_csv/copy_csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class CopyCSVEmptyListsTest : public InMemoryDBTest {
}
};

class CopyCSVLongStringTest : public InMemoryDBTest {
string getInputCSVDir() override { return "dataset/copy-csv-fault-tests/long-string/"; }
};

struct KnowsTablePTablePKnowsLists {
table_id_t knowsRelTableID;
table_id_t pNodeTableID;
Expand Down Expand Up @@ -322,3 +326,27 @@ TEST_F(CopyCSVSpecialCharTest, CopySpecialCharsCsv) {
TEST_F(CopyCSVEmptyListsTest, CopyCSVEmptyLists) {
testCopyCSVEmptyListsTest();
}

TEST_F(CopyCSVLongStringTest, LongStringError) {
auto storageManager = database->getStorageManager();
auto& catalog = *database->getCatalog();
auto tableID = catalog.getReadOnlyVersion()->getNodeTableIDFromName("person");
auto propertyIdx = catalog.getReadOnlyVersion()->getNodeProperty(tableID, "fName");
auto col =
storageManager->getNodesStore().getNodePropertyColumn(tableID, propertyIdx.propertyID);

EXPECT_EQ(4096, col->readValue(0).strVal.length());
string expectedResultName = "Alice";
auto repeatedTimes = 4096 / expectedResultName.length() + 1;
ostringstream os;
for (auto i = 0; i < repeatedTimes; i++) {
os << expectedResultName;
}
EXPECT_EQ(os.str().substr(0, 4096), col->readValue(0).strVal);
EXPECT_EQ("Bob", col->readValue(1).strVal);

propertyIdx = catalog.getReadOnlyVersion()->getNodeProperty(tableID, "gender");
col = storageManager->getNodesStore().getNodePropertyColumn(tableID, propertyIdx.propertyID);
EXPECT_EQ(1, col->readValue(0).val.int64Val);
EXPECT_EQ(2, col->readValue(1).val.int64Val);
}