Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve split speed #84

Merged
merged 7 commits into from
Jan 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions mdal/frmts/mdal_2dm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ std::unique_ptr<MDAL::Mesh> MDAL::Driver2dm::load( const std::string &meshFile,
{
if ( startsWith( line, "E4Q" ) )
{
chunks = split( line, " ", SplitBehaviour::SkipEmptyParts );
chunks = split( line, ' ' );
assert( faceIndex < faceCount );

Face &face = faces[faceIndex];
Expand All @@ -161,7 +161,7 @@ std::unique_ptr<MDAL::Mesh> MDAL::Driver2dm::load( const std::string &meshFile,
}
else if ( startsWith( line, "E3T" ) )
{
chunks = split( line, " ", SplitBehaviour::SkipEmptyParts );
chunks = split( line, ' ' );
assert( faceIndex < faceCount );

Face &face = faces[faceIndex];
Expand All @@ -181,7 +181,7 @@ std::unique_ptr<MDAL::Mesh> MDAL::Driver2dm::load( const std::string &meshFile,
startsWith( line, "E9Q" ) )
{
// We do not yet support these elements
chunks = split( line, " ", SplitBehaviour::SkipEmptyParts );
chunks = split( line, ' ' );
assert( faceIndex < faceCount );

//size_t elemID = toSizeT( chunks[1] );
Expand All @@ -191,7 +191,7 @@ std::unique_ptr<MDAL::Mesh> MDAL::Driver2dm::load( const std::string &meshFile,
}
else if ( startsWith( line, "ND" ) )
{
chunks = split( line, " ", SplitBehaviour::SkipEmptyParts );
chunks = split( line, ' ' );
size_t nodeID = toSizeT( chunks[1] ) - 1; // 2dm is numbered from 1
_parse_vertex_id_gaps( vertexIDtoIndex, vertexIndex, nodeID, status );
assert( vertexIndex < vertexCount );
Expand Down
8 changes: 4 additions & 4 deletions mdal/frmts/mdal_ascii_dat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ void MDAL::DriverAsciiDat::loadOldFormat( std::ifstream &in,
line = MDAL::trim( line );

// Split to tokens
std::vector<std::string> items = split( line, " ", SplitBehaviour::SkipEmptyParts );
std::vector<std::string> items = split( line, ' ' );
if ( items.size() < 1 )
continue; // empty line?? let's skip it

Expand Down Expand Up @@ -159,7 +159,7 @@ void MDAL::DriverAsciiDat::loadNewFormat( std::ifstream &in,
line = MDAL::trim( line );

// Split to tokens
std::vector<std::string> items = split( line, " ", SplitBehaviour::SkipEmptyParts );
std::vector<std::string> items = split( line, ' ' );
if ( items.size() < 1 )
continue; // empty line?? let's skip it

Expand Down Expand Up @@ -320,7 +320,7 @@ void MDAL::DriverAsciiDat::readVertexTimestep(
{
std::string line;
std::getline( stream, line );
std::vector<std::string> tsItems = split( line, " ", SplitBehaviour::SkipEmptyParts );
std::vector<std::string> tsItems = split( line, ' ' );

size_t index;
if ( m2dm )
Expand Down Expand Up @@ -374,7 +374,7 @@ void MDAL::DriverAsciiDat::readFaceTimestep(
{
std::string line;
std::getline( stream, line );
std::vector<std::string> tsItems = split( line, " ", SplitBehaviour::SkipEmptyParts );
std::vector<std::string> tsItems = split( line, ' ' );

if ( isVector )
{
Expand Down
12 changes: 6 additions & 6 deletions mdal/frmts/mdal_flo2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void MDAL::DriverFlo2D::parseCADPTSFile( const std::string &datFileName, std::ve
// CADPTS.DAT - COORDINATES OF CELL CENTERS (ELEM NUM, X, Y)
while ( std::getline( cadptsStream, line ) )
{
std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() != 3 )
{
throw MDAL_Status::Err_UnknownFormat;
Expand Down Expand Up @@ -140,7 +140,7 @@ void MDAL::DriverFlo2D::parseFPLAINFile( std::vector<double> &elevations,

while ( std::getline( fplainStream, line ) )
{
std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() != 7 )
{
throw MDAL_Status::Err_UnknownFormat;
Expand Down Expand Up @@ -220,7 +220,7 @@ void MDAL::DriverFlo2D::parseTIMDEPFile( const std::string &datFileName, const s

while ( std::getline( inStream, line ) )
{
std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() == 1 )
{
time = MDAL::toDouble( line );
Expand Down Expand Up @@ -303,7 +303,7 @@ void MDAL::DriverFlo2D::parseDEPTHFile( const std::string &datFileName, const st
{
if ( vertex_idx == nVertices ) throw MDAL_Status::Err_IncompatibleMesh;

std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() != 4 )
{
throw MDAL_Status::Err_UnknownFormat;
Expand Down Expand Up @@ -348,7 +348,7 @@ void MDAL::DriverFlo2D::parseVELFPVELOCFile( const std::string &datFileName )
{
if ( vertex_idx == nVertices ) throw MDAL_Status::Err_IncompatibleMesh;

std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() != 4 )
{
throw MDAL_Status::Err_UnknownFormat;
Expand Down Expand Up @@ -378,7 +378,7 @@ void MDAL::DriverFlo2D::parseVELFPVELOCFile( const std::string &datFileName )
{
if ( vertex_idx == nVertices ) throw MDAL_Status::Err_IncompatibleMesh;

std::vector<std::string> lineParts = MDAL::split( line, " ", MDAL::SplitBehaviour::SkipEmptyParts );
std::vector<std::string> lineParts = MDAL::split( line, ' ' );
if ( lineParts.size() != 4 )
{
throw MDAL_Status::Err_UnknownFormat;
Expand Down
4 changes: 2 additions & 2 deletions mdal/frmts/mdal_gdal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ std::string MDAL::DriverGdal::GDALFileName( const std::string &fileName )
double MDAL::DriverGdal::parseMetadataTime( const std::string &time_s )
{
std::string time_trimmed = MDAL::trim( time_s );
std::vector<std::string> times = MDAL::split( time_trimmed, " ", MDAL::SkipEmptyParts );
std::vector<std::string> times = MDAL::split( time_trimmed, ' ' );
return MDAL::toDouble( times[0] );
}

Expand All @@ -181,7 +181,7 @@ MDAL::DriverGdal::metadata_hash MDAL::DriverGdal::parseMetadata( GDALMajorObject
for ( int j = 0; GDALmetadata[j]; ++j )
{
std::string metadata_pair = GDALmetadata[j]; //KEY = VALUE
std::vector<std::string> metadata = MDAL::split( metadata_pair, "=", MDAL::SkipEmptyParts );
std::vector<std::string> metadata = MDAL::split( metadata_pair, '=' );
if ( metadata.size() > 1 )
{
std::string key = MDAL::toLower( metadata[0] );
Expand Down
2 changes: 1 addition & 1 deletion mdal/mdal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static MDAL_Status sLastStatus;

const char *MDAL_Version()
{
return "0.1.4";
return "0.1.5";
}

MDAL_Status MDAL_LastStatus()
Expand Down
52 changes: 38 additions & 14 deletions mdal/mdal_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,51 @@ bool MDAL::endsWith( const std::string &str, const std::string &substr, Contains
return endsWith( toLower( str ), toLower( substr ), ContainsBehaviour::CaseSensitive );
}

std::vector<std::string> MDAL::split( const std::string &str, const std::string &delimiter, SplitBehaviour behaviour )
std::vector<std::string> MDAL::split( const std::string &str,
const char delimiter
)
{
std::string remaining( str );
std::vector<std::string> list;
size_t pos = 0;
std::string::const_iterator start = str.begin();
std::string::const_iterator end = str.end();
std::string::const_iterator next;
std::string token;
while ( ( pos = remaining.find( delimiter ) ) != std::string::npos )
do
{
token = remaining.substr( 0, pos );
next = std::find( start, end, delimiter );
token = std::string( start, next );
if ( !token.empty() )
list.push_back( token );

if ( behaviour == SplitBehaviour::SkipEmptyParts )
{
if ( !token.empty() )
list.push_back( token );
}
if ( next == end )
break;
else
list.push_back( token );
start = next + 1;
}
while ( true );
return list;
}

remaining.erase( 0, pos + delimiter.length() );

std::vector<std::string> MDAL::split( const std::string &str,
const std::string &delimiter )
{
std::vector<std::string> list;
std::string::size_type start = 0;
std::string::size_type next;
std::string token;
do
{
next = str.find( delimiter, start );
if ( next == std::string::npos )
token = str.substr( start ); // rest of the string
else
token = str.substr( start, next - start ); // part of the string
if ( !token.empty() )
list.push_back( token );
start = next + delimiter.size();
}
list.push_back( remaining );
while ( next != std::string::npos );
return list;
}

Expand Down Expand Up @@ -308,7 +332,7 @@ double MDAL::parseTimeUnits( const std::string &units )
// "seconds since 2001-05-05 00:00:00"
// "hours since 1900-01-01 00:00:0.0"
// "days since 1961-01-01 00:00:00"
const std::vector<std::string> units_list = MDAL::split( units, " since ", SkipEmptyParts );
const std::vector<std::string> units_list = MDAL::split( units, " since " );
if ( units_list.size() == 2 )
{
// Give me hours
Expand Down
21 changes: 14 additions & 7 deletions mdal/mdal_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#ifndef MDAL_UTILS_HPP
#define MDAL_UTILS_HPP

// Macro for exporting symbols
// for unit tests (on windows)
#define MDAL_TEST_EXPORT MDAL_EXPORT

#include <string>
#include <vector>
#include <stddef.h>
Expand Down Expand Up @@ -59,12 +63,15 @@ namespace MDAL
bool toBool( const std::string &str );
bool isNumber( const std::string &str );

enum SplitBehaviour
{
SkipEmptyParts,
KeepEmptyParts
};
std::vector<std::string> split( const std::string &str, const std::string &delimiter, SplitBehaviour behaviour );
/**
* Splits by deliminer and skips empty parts.
* Faster than version with std::string
*/
MDAL_TEST_EXPORT std::vector<std::string> split( const std::string &str, const char delimiter );

//! Splits by deliminer and skips empty parts
MDAL_TEST_EXPORT std::vector<std::string> split( const std::string &str, const std::string &delimiter );

std::string join( const std::vector<std::string> parts, const std::string &delimiter );

//! Right trim
Expand All @@ -87,7 +94,7 @@ namespace MDAL

// time
//! Returns a delimiter to get time in hours
double parseTimeUnits( const std::string &units );
MDAL_TEST_EXPORT double parseTimeUnits( const std::string &units );

// statistics
void combineStatistics( Statistics &main, const Statistics &other );
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ SET(TESTS
test_api.cpp
test_ascii_dat.cpp
test_binary_dat.cpp
test_mdal_utils.cpp
)

IF(HDF5_FOUND)
Expand Down
99 changes: 99 additions & 0 deletions tests/test_mdal_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
MDAL - Mesh Data Abstraction Library (MIT License)
Copyright (C) 2019 Peter Petrik (zilolv at gmail dot com)
*/
#include "gtest/gtest.h"
#include <limits>
#include <cmath>
#include <string>
#include <vector>

//mdal
#include "mdal.h"
#include "mdal_utils.hpp"
#include "mdal_testutils.hpp"

struct SplitTestData
{
SplitTestData( const std::string &input,
const std::vector<std::string> &results ):
mInput( input ), mExpectedResult( results ) {}

std::string mInput;
std::vector<std::string> mExpectedResult;
};


TEST( MdalUtilsTest, SplitString )
{
std::vector<SplitTestData> tests =
{
SplitTestData( "a;b;c", {"a", "b", "c"} ),
SplitTestData( "a;;b;c", {"a", "b", "c"} ),
SplitTestData( "a;b;", {"a", "b"} ),
SplitTestData( ";b;", {"b"} ),
SplitTestData( "a", {"a"} ),
SplitTestData( "", {} )
};
for ( const auto &test : tests )
{
EXPECT_EQ( test.mExpectedResult, MDAL::split( test.mInput, ";" ) );
}

// now test for string with multiple chars
std::vector<SplitTestData> tests2 =
{
SplitTestData( "a;;;b;c", {"a", "b;c"} ),
SplitTestData( "a;;;b;;;c", {"a", "b", "c"} ),
SplitTestData( "a;;b;c", {"a;;b;c"} ),
SplitTestData( "b;;;", {"b"} )
};
for ( const auto &test : tests2 )
{
EXPECT_EQ( test.mExpectedResult, MDAL::split( test.mInput, ";;;" ) );
}
}

TEST( MdalUtilsTest, SplitChar )
{
std::vector<SplitTestData> tests =
{
SplitTestData( "a;b;c", {"a", "b", "c"} ),
SplitTestData( "a;;b;c", {"a", "b", "c"} ),
SplitTestData( "a;b;", {"a", "b"} ),
SplitTestData( ";b;", {"b"} ),
SplitTestData( "a", {"a"} ),
SplitTestData( "", {} )
};
for ( const auto &test : tests )
{
EXPECT_EQ( test.mExpectedResult, MDAL::split( test.mInput, ';' ) );
}
}

TEST( MdalUtilsTest, TimeParsing )
{
std::vector<std::pair<std::string, double>> tests =
{
{ "seconds since 2001-05-05 00:00:00", 3600 },
{ "minutes since 2001-05-05 00:00:00", 60 },
{ "hours since 1900-01-01 00:00:0.0", 1 },
{ "days since 1961-01-01 00:00:00", 1.0 / 24.0 },
{ "invalid format of time", 1 }
};
for ( const auto &test : tests )
{
EXPECT_EQ( test.second, MDAL::parseTimeUnits( test.first ) );
}
}


int main( int argc, char **argv )
{
testing::InitGoogleTest( &argc, argv );
init_test();
int ret = RUN_ALL_TESTS();
finalize_test();
return ret;
}