// text.h
* Copyright 2010 10gen Inc.
#pragma once
#include <vector>
#include <string>
#include "mongo/base/disallow_copying.h"
#include "mongo/config.h"
namespace mongo {
class StringSplitter {
/** @param big the std::string to be split
@param splitter the delimiter
StringSplitter(const char* big, const char* splitter) : _big(big), _splitter(splitter) {}
/** @return true if more to be taken via next() */
bool more() const {
return _big[0] != 0;
/** get next split std::string fragment */
std::string next();
void split(std::vector<std::string>& l);
std::vector<std::string> split();
static std::vector<std::string> split(const std::string& big, const std::string& splitter);
static std::string join(const std::vector<std::string>& l, const std::string& split);
const char* _big;
const char* _splitter;
/* This doesn't defend against ALL bad UTF8, but it will guarantee that the
* std::string can be converted to sequence of codepoints. However, it doesn't
* guarantee that the codepoints are valid.
bool isValidUTF8(const char* s);
bool isValidUTF8(const std::string& s);
// expect that n contains a base ten number and nothing else after it
// NOTE win version hasn't been tested directly
long long parseLL(const char* n);
#if defined(_WIN32)
std::string toUtf8String(const std::wstring& wide);
std::wstring toWideString(const char* s);
bool writeUtf8ToWindowsConsole(const char* utf8String, unsigned int utf8StringSize);
/* like toWideString but UNICODE macro sensitive */
#if !defined(_UNICODE)
#error temp error
inline std::string toNativeString(const char* s) {
return s;
inline std::wstring toNativeString(const char* s) {
return toWideString(s);
class WindowsCommandLine {
char** _argv;
char** _envp;
WindowsCommandLine(int argc, wchar_t* argvW[], wchar_t* envpW[]);
char** argv(void) const {
return _argv;
char** envp(void) const {
return _envp;
#endif // #if defined(_WIN32)
* Construct a Windows command line string, UTF-8 encoded, from a vector of
* UTF-8 arguments, "argv".
* See "Parsing C++ Command-Line Arguments (C++)"
std::string constructUtf8WindowsCommandLine(const std::vector<std::string>& argv);
} // namespace mongo
