diff --git a/README.md b/README.md index e5584b3..186b5ec 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,23 @@ # Open Chess Game Database Standard (OCGDB) -Version Alpha +Version Beta -## Features/Highlights of the Standard -- Developed based on SQL in general, SQLite in particular -- It has an open data structure: very easy to understand, change, convert to/from other formats -- Terms, names, types… are followed PGN standard as much as possible -- Basic tables: Games, Events, Sites, Players, Comments. Other tables: Info -- Main columns of the table Games: EvenID, SiteID, WhiteID, WhiteElo, BlackID, Result, Date, ECO, FEN, Moves/Moves1/Moves2 -- FEN (in the table Games) is for the starting position. If the game started from origin position, it could be empty or NULL -- Moves are stored in table Games with a few choices: as text in column Moves, or binary forms in column Moves1 (for 1-byte encoding) or column Moves2 (for 2-bytes encoding) -- It could query game header information via SQL/SQL search engines -- It could do approximate-position-searching with the support from the belonging code and Position Query Language (PQL) -- There are no or very high limit about the number of games. Some tests show it worked well with databases of over 90 million games and could work with much larger numbers -- It could serve for any purposes of chess game databases from web to desktop, mobiphone apps +## Brief main ideas/techniques +- Use SQL/SQLite as the backbone/framework for storing data and querying general information +- Approximate position searching: a) Parse games on the fly b) Use Position Query Language (PQL) for querying widely and dynamically +- Names, tables follow to PGN tags + + +## Why OCGDB? Features/Highlights +- Open databases: users could easily understand data structures, modify, convert to or from other database formats +- It supports the highest numbers of games (tested with 94 million games, estimated it could work with billions of games) +- It is based on SQL which is the strongest query language for querying general information. Users can query without using chess specific programs +- It has its own query language (PQL) for approximate-position-searching thus it can cover very widely +- It could use widely, from mobile, desktop, console to web applications  +- It is one of programs that could create the smallest chess game databases +- It is one of the fastest chess game database programs when generating databases and searching +- MIT license: you may use it for any applications/purposes unlimitedly without worrying about license conditions We believe it is one of the fastest (in terms of speeds of creating and querying/searching), smallest (in terms of database sizes), strongest (in terms of game numbers), and smartest (in terms of querying/position-searching) chess game database programs. It could compete for all parameters, results with the best chess game database formats and their programs/tools. @@ -137,6 +140,27 @@ Convert a PGN file of 94 million games from Lichess: #games: 93679650, elapsed: 5209214ms 1:26:49, speed: 17983 games/s, #blocks: 25777, processed size: 206208 MB ``` +## Retrieve data +Query database and extract some important data fields: + +``` +for (auto cnt = 0; statement.executeStep(); cnt++) { + auto gameID = statement.getColumn("ID").getInt64(); assert(gameID > 0); + auto fenText = statement.getColumn("FEN").getText(); + auto moveText = statement.getColumn("Moves").getText(); +} +``` + +Query database, extract some data fields and parse into chessboard, using multi-threads: +``` +for auto cnt = 0; statement.executeStep(); cnt++) { + auto gameID = statement.getColumn("ID").getInt64(); + auto fenText = statement.getColumn("FEN").getText(); + auto moveText = statement.getColumn("Moves").getText(); + threadParsePGNGame(gameID, fenText, moveText); +} +``` + ## Position query language (PQL) The EBNF (Extended Backus Naur Form) of the language is as the below: @@ -264,6 +288,7 @@ ocgdb -db c:\db\big.ocgdb.db3 -cpu 4 -q "Q=3" -q"P[d4, e5, f4, g4] = 4 and kb7" ``` ## History +* 25/01/2022: Version Beta * 23/01/2022: Version Alpha * 20/11/2021: Improve/clean code, improve speed for benchmark * 16/11/2021: Improve speed for converter, convert 3.45 million games under a minute diff --git a/projects/ocgdb.xcodeproj/project.xcworkspace/xcuserdata/nguyenpham.xcuserdatad/UserInterfaceState.xcuserstate b/projects/ocgdb.xcodeproj/project.xcworkspace/xcuserdata/nguyenpham.xcuserdatad/UserInterfaceState.xcuserstate index 84c28b4..c5d37f2 100644 Binary files a/projects/ocgdb.xcodeproj/project.xcworkspace/xcuserdata/nguyenpham.xcuserdatad/UserInterfaceState.xcuserstate and b/projects/ocgdb.xcodeproj/project.xcworkspace/xcuserdata/nguyenpham.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/projects/ocgdb.xcodeproj/xcshareddata/xcschemes/ocgdb.xcscheme b/projects/ocgdb.xcodeproj/xcshareddata/xcschemes/ocgdb.xcscheme index 9354539..deca944 100644 --- a/projects/ocgdb.xcodeproj/xcshareddata/xcschemes/ocgdb.xcscheme +++ b/projects/ocgdb.xcodeproj/xcshareddata/xcschemes/ocgdb.xcscheme @@ -65,7 +65,7 @@ + isEnabled = "YES"> + isEnabled = "NO"> + isEnabled = "YES"> + + @@ -115,6 +119,10 @@ argument = "-cpu 1" isEnabled = "NO"> + + @@ -414,8 +414,8 @@ filePath = "../src/board/base.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "950" - endingLineNumber = "950" + startingLineNumber = "957" + endingLineNumber = "957" landmarkName = "BoardCore::fromMoveList(gameId, moveVec, flag, shouldStop)" landmarkType = "7"> @@ -510,8 +510,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "727" - endingLineNumber = "727" + startingLineNumber = "721" + endingLineNumber = "721" landmarkName = "Builder::createDb(path)" landmarkType = "7"> @@ -526,8 +526,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "725" - endingLineNumber = "725" + startingLineNumber = "719" + endingLineNumber = "719" landmarkName = "Builder::createDb(path)" landmarkType = "7"> @@ -542,8 +542,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "440" - endingLineNumber = "440" + startingLineNumber = "430" + endingLineNumber = "430" landmarkName = "Builder::addNewField(fieldName)" landmarkType = "7"> @@ -558,8 +558,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1120" - endingLineNumber = "1120" + startingLineNumber = "1139" + endingLineNumber = "1139" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -574,8 +574,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1125" - endingLineNumber = "1125" + startingLineNumber = "1144" + endingLineNumber = "1144" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -590,28 +590,12 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1146" - endingLineNumber = "1146" + startingLineNumber = "1165" + endingLineNumber = "1165" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> - - - - @@ -638,8 +622,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1027" - endingLineNumber = "1027" + startingLineNumber = "1041" + endingLineNumber = "1041" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -654,8 +638,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1023" - endingLineNumber = "1023" + startingLineNumber = "1037" + endingLineNumber = "1037" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -670,8 +654,8 @@ filePath = "../src/main.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "77" - endingLineNumber = "77" + startingLineNumber = "75" + endingLineNumber = "75" landmarkName = "main(argc, argv)" landmarkType = "9"> @@ -686,8 +670,8 @@ filePath = "../src/main.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "76" - endingLineNumber = "76" + startingLineNumber = "74" + endingLineNumber = "74" landmarkName = "main(argc, argv)" landmarkType = "9"> @@ -702,9 +686,9 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1342" - endingLineNumber = "1342" - landmarkName = "Builder::searchPosition(db, query)" + startingLineNumber = "1402" + endingLineNumber = "1402" + landmarkName = "Builder::searchPosition(db, pgnPaths, query)" landmarkType = "7"> @@ -718,8 +702,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "931" - endingLineNumber = "931" + startingLineNumber = "942" + endingLineNumber = "942" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -734,8 +718,8 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "932" - endingLineNumber = "932" + startingLineNumber = "943" + endingLineNumber = "943" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> @@ -750,11 +734,299 @@ filePath = "../src/builder.cpp" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "941" - endingLineNumber = "941" + startingLineNumber = "952" + endingLineNumber = "952" landmarkName = "Builder::addGame(itemMap, moveText)" landmarkType = "7"> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/board/base.cpp b/src/board/base.cpp index e5db156..876fb90 100644 --- a/src/board/base.cpp +++ b/src/board/base.cpp @@ -887,6 +887,13 @@ bool BoardCore::fromMoveList(int64_t gameId, const std::string& str, Notation no histList.back().bitboardVec = bitboardVec; } + if (flag & ParseMoveListFlag_create_san) { + if (notation == Notation::san) { + histList.back().sanString = ss; + } else { + // missing function + } + } } // last position @@ -1083,6 +1090,32 @@ std::string BoardCore::toSimplePgn() const return stringStream.str(); } +std::string BoardCore::toPgn(const std::unordered_map tags) const +{ + std::ostringstream stringStream; + + auto it = tags.find("Event"); + stringStream << "[Event \"" + << (it != tags.end() ? it->second : "event") + << "\"]\n"; + + for(auto && it : tags) { + if (it.first != "Event") { + stringStream << "[" << it.first << "\"" + << it.second + << "\"]\n"; + } + } + if (!startFen.empty()) { + stringStream << "[FEN \"" << startFen << "\"]\n"; + } + + stringStream << "\n" << toMoveListString(Notation::san, + 8, true, + CommentComputerInfoType::standard); + return stringStream.str(); +} + void BoardCore::flip(FlipMode flipMode) { diff --git a/src/board/base.h b/src/board/base.h index 59aecfb..5cef2a3 100644 --- a/src/board/base.h +++ b/src/board/base.h @@ -13,6 +13,7 @@ #include #include +#include #include // for setfill, setw @@ -224,6 +225,10 @@ namespace bslib { result.result = ResultType::noresult; } + virtual std::string toString() const { + return ""; + } + bool isValid(const Move& move) const { return move.isValid() && isPositionValid(move.from) && isPositionValid(move.dest); } @@ -363,12 +368,13 @@ namespace bslib { enum ParseMoveListFlag { ParseMoveListFlag_quick_check = 1 << 0, - ParseMoveListFlag_create_fen = 1 << 1, - ParseMoveListFlag_create_bitboard = 1 << 2, - ParseMoveListFlag_discardComment = 1 << 3, - ParseMoveListFlag_parseComment = 1 << 4, + ParseMoveListFlag_create_san = 1 << 1, + ParseMoveListFlag_create_fen = 1 << 2, + ParseMoveListFlag_create_bitboard = 1 << 3, + ParseMoveListFlag_discardComment = 1 << 4, + ParseMoveListFlag_parseComment = 1 << 5, - ParseMoveListFlag_move_size_1_byte = 1 << 5, // for the 2nd function one only + ParseMoveListFlag_move_size_1_byte = 1 << 6, // for the 2nd function one only }; virtual bool fromMoveList(int64_t gameId, const std::string&, Notation, int flag, std::function& bitboardVec, const BoardCore*)> = nullptr); @@ -386,6 +392,7 @@ namespace bslib { static std::string toMoveListString(const std::vector& histList, ChessVariant variant, Notation notation, int itemPerLine, bool moveCounter, CommentComputerInfoType computingInfo, bool pawnUnit, int precision); virtual std::string toSimplePgn() const; + virtual std::string toPgn(const std::unordered_map tags) const; virtual int16_t move2i16(int from, int dest, int promotion, bool haveComment) const = 0; virtual void i16ToMove(int data, int& from, int& dest, int& promotion, bool& haveComment) const = 0; diff --git a/src/board/chess.h b/src/board/chess.h index 4b20480..fbad4c4 100644 --- a/src/board/chess.h +++ b/src/board/chess.h @@ -32,7 +32,7 @@ namespace bslib { ChessBoard(const ChessBoard&); virtual ~ChessBoard() override; - virtual std::string toString() const; + virtual std::string toString() const override; virtual bool isValid() const override; virtual int columnCount() const override; diff --git a/src/builder.cpp b/src/builder.cpp index 1b59223..b98e87f 100644 --- a/src/builder.cpp +++ b/src/builder.cpp @@ -29,29 +29,29 @@ Builder* builder = nullptr; bool ParaRecord::isValid() const { - if (dbPath.empty()) { + if (dbPath.empty() && task != Task::query) { errorString = "Must have a database (.db3) path. Mising or wrong parameter -db"; return false; } + auto hasPgn = false; + for(auto && s : pgnPaths) { + if (!s.empty()) { + hasPgn = true; + break; + } + } + errorString.clear(); auto ok = false; switch (task) { - case Task::createSQLdatabase: + case Task::create: { if (cpuNumber <= 0) { errorString = "CPU number must be greater than zero"; break; } - auto hasPgn = false; - for(auto && s : pgnPaths) { - if (!s.empty()) { - hasPgn = true; - break; - } - } - if (!hasPgn) { errorString = "Must have at least one PGN path. Mising or wrong parameter -pgn"; break; @@ -62,6 +62,10 @@ bool ParaRecord::isValid() const } case Task::query: + if (dbPath.empty() && !hasPgn) { + errorString = "Must have a database (.db3) path or a PGN path. Mising or wrong parameter -db and -pgn"; + return false; + } if (queries.empty()) { errorString = "Must have at least one query. Mising or wrong parameter -q"; break; @@ -83,6 +87,21 @@ bool ParaRecord::isValid() const return ok; } +static const std::map optionNameMap = { + // creating + {"moves", 0}, + {"moves1", 1}, + {"moves2", 2}, + {"acceptnewtags", 3}, + {"discardcomments", 4}, + {"discardsites", 5}, + {"discardnoelo", 6}, + // query + {"printall", 7}, + {"printfen", 8}, + {"printpgn", 9}, +}; + std::string ParaRecord::toString() const { std::string s; @@ -93,19 +112,7 @@ std::string ParaRecord::toString() const "bench" }; - - const std::string optionNames[] = { - "comment_discard", - "site_discard", - "accept_newTags", - "player_limit_elo", - "player_discard_no_elo", - "query_stop_at_first_result", - "query_print_all", - "query_print_fen", - "query_print_pgn", - }; - + s = "\tTask: " + taskNames[static_cast(task)] + "\n"; s += "\tPGN paths:\n"; @@ -125,20 +132,42 @@ std::string ParaRecord::toString() const "Moves", "Moves1", "Moves2", "Moves + Moves1", "Moves + Moves2" }; - s += "\tMove columns (for creating): " + moveModeNames[static_cast(columnMovesMode)] + "\n"; s += "\tOptions: "; - for(auto && o : optionSet) { - s += optionNames[static_cast(o)] + "; "; + for(auto && it : optionNameMap) { + if (optionFlag & (1 << it.second)) { + s += it.first + ";"; + } } + s += "\n"; - s += "\tgameNumberLimit: " + std::to_string(gameNumberLimit) + "\n"; - s += "\tcpu: " + std::to_string(cpuNumber) + ", low Elo: " + std::to_string(lowElo) + "\n"; - + s += "\tgameNumberLimit: " + std::to_string(gameNumberLimit) + "\n" + + "\tresultNumberLimit: " + std::to_string(resultNumberLimit) + "\n" + + "\tcpu: " + std::to_string(cpuNumber) + + ", min Elo: " + std::to_string(limitElo) + + ", min game length: " + std::to_string(limitLen) + + "\n"; + return s; } + +void ParaRecord::setupOptions(const std::string& optionString) +{ + optionFlag = 0; + auto vec = bslib::Funcs::splitString(optionString, ';'); + + for(auto && s : vec) { + auto it = optionNameMap.find(s); + if (it == optionNameMap.end()) { + std::cerr << "Error: Don't know option string: " << it->first << std::endl; + } else { + optionFlag |= 1 << it->second; + } + } +} + static const char* tagNames[] = { "GameID", // Not real PGN tag, added for convernience @@ -166,12 +195,12 @@ void ThreadRecord::init(SQLite::Database* mDb) errCnt = 0; - assert(mDb); board = Builder::createBoard(bslib::ChessVariant::standard); - - insertCommentStatement = new SQLite::Statement(*mDb, "INSERT INTO Comments (GameID, Ply, Comment) VALUES (?, ?, ?)"); - buf = new int8_t[1024 * 2]; + + if (mDb) { + insertCommentStatement = new SQLite::Statement(*mDb, "INSERT INTO Comments (GameID, Ply, Comment) VALUES (?, ?, ?)"); + } } ThreadRecord::~ThreadRecord() @@ -255,14 +284,14 @@ bslib::BoardCore* Builder::createBoard(bslib::ChessVariant variant) void Builder::runTask(const ParaRecord& param) { switch (param.task) { - case Task::createSQLdatabase: + case Task::create: convertPgn2Sql(param); break; case Task::bench: - bench(param.dbPath, param.cpuNumber, param.optionSet); + bench(param); break; case Task::query: - query(param.dbPath, param.cpuNumber, param.queries, param.optionSet); + query(param, param.queries); break; default: @@ -270,7 +299,7 @@ void Builder::runTask(const ParaRecord& param) } } -void Builder::convertPgn2Sql(const ParaRecord& paraRecord) +void Builder::convertPgn2Sql(const ParaRecord& _paraRecord) { // Prepare setDatabasePath(paraRecord.dbPath); @@ -279,47 +308,21 @@ void Builder::convertPgn2Sql(const ParaRecord& paraRecord) std::remove(paraRecord.dbPath.c_str()); startTime = getNow(); - + // options { - createoption_AcceptNewField = paraRecord.optionSet.find(Option::accept_newTags) != paraRecord.optionSet.end(); - createoption_site_discard = paraRecord.optionSet.find(Option::site_discard) != paraRecord.optionSet.end(); - createoption_comment_discard = paraRecord.optionSet.find(Option::comment_discard) != paraRecord.optionSet.end(); + paraRecord = _paraRecord; - createoption_elo_limit = paraRecord.optionSet.find(Option::player_limit_elo) != paraRecord.optionSet.end(); - createoption_elo_discard_no_elo = paraRecord.optionSet.find(Option::player_discard_no_elo) != paraRecord.optionSet.end(); - - createoption_KeepMovesField = false; - createoption_EncodeMoveSize = 0; + int movebit = paraRecord.optionFlag & (create_flag_moves|create_flag_moves1|create_flag_moves2); - switch (paraRecord.columnMovesMode) { - case ColumnMovesMode::moves: - createoption_KeepMovesField = true; - break; - case ColumnMovesMode::moves1: - createoption_EncodeMoveSize = 1; - break; - case ColumnMovesMode::moves2: - createoption_EncodeMoveSize = 2; - break; - case ColumnMovesMode::moves_moves1: - createoption_KeepMovesField = true; - createoption_EncodeMoveSize = 1; - break; - case ColumnMovesMode::moves_moves2: - createoption_KeepMovesField = true; - createoption_EncodeMoveSize = 2; - break; - - default: - break; - } - - createoption_gameNumberLimit = paraRecord.gameNumberLimit; - createoption_lowElo = paraRecord.lowElo; - - if (!createoption_KeepMovesField && createoption_EncodeMoveSize != 1 && createoption_EncodeMoveSize != 2) { + if (!movebit) { std::cout << "WARNING: there is not any column for storing moves" << std::endl; + } else if (movebit != create_flag_moves && movebit != create_flag_moves1 && movebit != create_flag_moves2) { + std::cout << "WARNING: redundant! There are more than one column for storing moves" << std::endl; + + if ((paraRecord.optionFlag & (create_flag_moves1 | create_flag_moves2)) == (create_flag_moves1 | create_flag_moves2)) { + std::cout << "WARNING: redundant! There are two binary columns for storing moves. Use Moves2, discard Move1" << std::endl; + } } } @@ -328,12 +331,11 @@ void Builder::convertPgn2Sql(const ParaRecord& paraRecord) { gameCnt = 0; eventCnt = playerCnt = siteCnt = 1; - errCnt = posCnt = 0; - - auto cpu = paraRecord.cpuNumber; - if (cpu < 0) cpu = std::thread::hardware_concurrency(); - pool = new thread_pool(cpu); + errCnt = 0; + playerIdMap.reserve(8 * 1024 * 1024); + eventIdMap.reserve(128 * 1024); + siteIdMap.reserve(128 * 1024); extraFieldSet.clear(); fieldOrderMap.clear(); @@ -346,23 +348,19 @@ void Builder::convertPgn2Sql(const ParaRecord& paraRecord) fieldOrderMap["UTCDate"] = TagIdx_Date; tagIdx_Moves = -1; tagIdx_MovesBlob = -1; - if (createoption_KeepMovesField) { + if (paraRecord.optionFlag & create_flag_moves) { tagIdx_Moves = idx++; fieldOrderMap["Moves"] = tagIdx_Moves; extraFieldSet.insert("Moves"); } - if (createoption_EncodeMoveSize == 1 || createoption_EncodeMoveSize == 2) { + if (paraRecord.optionFlag & (create_flag_moves1 | create_flag_moves2)) { tagIdx_MovesBlob = idx++; - std::string s = "Moves" + std::to_string(createoption_EncodeMoveSize); + std::string s = (paraRecord.optionFlag & create_flag_moves2) ? "Moves2" : "Moves1"; fieldOrderMap[s] = tagIdx_MovesBlob; extraFieldSet.insert(s); } insertGameStatementIdxSz = idx; - playerIdMap.reserve(8 * 1024 * 1024); - eventIdMap.reserve(128 * 1024); - siteIdMap.reserve(128 * 1024); - // Create database mDb = createDb(paraRecord.dbPath); @@ -370,19 +368,11 @@ void Builder::convertPgn2Sql(const ParaRecord& paraRecord) return; } - // prepared statements - playerInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Players (ID, Name, Elo) VALUES (?, ?, ?)"); - eventInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Events (ID, Name) VALUES (?, ?)"); - siteInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Sites (ID, Name) VALUES (?, ?)"); - - std::cout << "Thread count: " << pool->get_thread_count() - << std::endl; + auto cpu = paraRecord.cpuNumber; + if (cpu < 0) cpu = std::thread::hardware_concurrency(); + pool = new thread_pool(cpu); - std::cout << "Move encoding size: " - << createoption_EncodeMoveSize - << (createoption_KeepMovesField ? ", keep field Moves" : " without field Moves") - << std::endl; - + std::cout << "Thread count: " << pool->get_thread_count() << std::endl; } uint64_t cnt = 0; @@ -522,7 +512,12 @@ void Builder::processDataBlock(char* buffer, long sz, bool connectBlock) if (hasEvent && p - buffer > 2) { *(p - 2) = 0; - threadAddGame(tagMap, moves); +// threadAddGame(tagMap, moves); + if (paraRecord.task == Task::create) { + threadAddGame(tagMap, moves); + } else { + threadQueryGame(tagMap, moves); + } } tagMap.clear(); @@ -597,16 +592,18 @@ void Builder::processDataBlock(char* buffer, long sz, bool connectBlock) if (connectBlock) { processHalfBegin(event, (long)sz - (event - buffer)); } else if (moves) { - threadAddGame(tagMap, moves); + if (paraRecord.task == Task::create) { + threadAddGame(tagMap, moves); + } else { + threadQueryGame(tagMap, moves); + } } } uint64_t Builder::processPgnFile(const std::string& path) { std::cout << "Processing PGN file: '" << path << "'" << std::endl; - - // Begin transaction -// SQLite::Transaction transaction(*mDb); + auto transactionCnt = 0; { @@ -617,7 +614,7 @@ uint64_t Builder::processPgnFile(const std::string& path) auto size = bslib::Funcs::getFileSize(stream); blockCnt = processedPgnSz = 0; - for (size_t sz = 0, idx = 0; sz < size && gameCnt < createoption_gameNumberLimit; idx++) { + for (size_t sz = 0, idx = 0; sz < size && gameCnt < paraRecord.gameNumberLimit; idx++) { auto k = std::min(blockSz, size - sz); if (k == 0) { break; @@ -625,7 +622,7 @@ uint64_t Builder::processPgnFile(const std::string& path) buffer[k] = 0; if (fread(buffer, k, 1, stream)) { - if (transactionCnt <= 0) { + if (mDb && transactionCnt <= 0) { transactionCnt = 400; mDb->exec("BEGIN"); std::cout << "BEGIN TRANSACTION" << std::endl; @@ -637,7 +634,7 @@ uint64_t Builder::processPgnFile(const std::string& path) pool->wait_for_tasks(); transactionCnt--; - if (transactionCnt <= 0) { + if (mDb && transactionCnt <= 0) { mDb->exec("COMMIT"); std::cout << "COMMIT TRANSACTION" << std::endl; } @@ -663,13 +660,10 @@ uint64_t Builder::processPgnFile(const std::string& path) } } - if (transactionCnt > 0) { + if (mDb && transactionCnt > 0) { mDb->exec("COMMIT"); } - // Commit transaction -// transaction.commit(); - printStats(); return gameCnt; @@ -733,11 +727,12 @@ SQLite::Database* Builder::createDb(const std::string& path) "Date TEXT, Round INTEGER, WhiteID INTEGER, WhiteElo INTEGER, BlackID INTEGER, BlackElo INTEGER, "\ "Result INTEGER, TimeControl TEXT, ECO TEXT, PlyCount INTEGER, FEN TEXT"; - if (createoption_KeepMovesField) { + if (paraRecord.optionFlag & create_flag_moves) { sqlstring0 += ", Moves TEXT"; } - if (createoption_EncodeMoveSize == 1 || createoption_EncodeMoveSize == 2) { - sqlstring0 += ", Moves" + std::to_string(createoption_EncodeMoveSize) + " BLOB DEFAULT NULL"; + if (paraRecord.optionFlag & (create_flag_moves1 | create_flag_moves2)) { + auto sz = (paraRecord.optionFlag & create_flag_moves2) ? 2 : 1; + sqlstring0 += ", Moves" + std::to_string(sz) + " BLOB DEFAULT NULL"; } std::string sqlstring1 = @@ -754,6 +749,11 @@ SQLite::Database* Builder::createDb(const std::string& path) // mDb->exec("PRAGMA synchronous=OFF"); // mDb->exec("PRAGMA cache_size=64000"); + // prepared statements + playerInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Players (ID, Name, Elo) VALUES (?, ?, ?)"); + eventInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Events (ID, Name) VALUES (?, ?)"); + siteInsertStatement = new SQLite::Statement(*mDb, "INSERT INTO Sites (ID, Name) VALUES (?, ?)"); + return mDb; } catch (std::exception& e) @@ -851,11 +851,22 @@ void doAddGame(const std::unordered_map& itemMap, const char* move builder->addGame(itemMap, moveText); } +void doQueryGame(const std::unordered_map& itemMap, const char* moveText) +{ + assert(builder); + builder->queryGame(itemMap, moveText); +} + void Builder::threadAddGame(const std::unordered_map& itemMap, const char* moveText) { pool->submit(doAddGame, itemMap, moveText); } +void Builder::threadQueryGame(const std::unordered_map& itemMap, const char* moveText) +{ + pool->submit(doQueryGame, itemMap, moveText); +} + static void standardizeDate(char* date) { assert(date); @@ -914,7 +925,7 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha } case TagIdx_Site: { - if (createoption_site_discard) { + if (paraRecord.optionFlag & create_flag_discard_sites) { intMap[TagIdx_Site] = 1; // empty break; } @@ -981,6 +992,9 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha case TagIdx_PlyCount: { plyCount = std::atoi(s); + if (paraRecord.limitLen > plyCount) { + return false; + } break; } @@ -1019,15 +1033,15 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha } } - if (createoption_elo_discard_no_elo && (whiteElo <= 0 || blackElo <= 0)) { + if ((paraRecord.optionFlag & create_flag_discard_no_elo) && (whiteElo <= 0 || blackElo <= 0)) { return false; } - if (createoption_elo_limit && (whiteElo < createoption_lowElo || blackElo < createoption_lowElo)) { + if (paraRecord.limitElo > 0 && (whiteElo < paraRecord.limitElo || blackElo < paraRecord.limitElo)) { return false; } - if (createoption_AcceptNewField && strcmp(it.first, "SetUp") != 0) { + if ((paraRecord.optionFlag & create_flag_accept_new_tags) && strcmp(it.first, "SetUp") != 0) { if (t->insertGameStatement) { delete t->insertGameStatement; t->insertGameStatement = nullptr; @@ -1061,7 +1075,7 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha intMap[TagIdx_GameID] = gameID; if (tagIdx_Moves >= 0) { - assert(createoption_KeepMovesField); + assert(paraRecord.optionFlag & create_flag_moves); // trim left while(*moveText <= ' ') moveText++; @@ -1070,26 +1084,34 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha // Parse moves if (tagIdx_MovesBlob >= 0) { - assert(createoption_EncodeMoveSize == 1 || createoption_EncodeMoveSize == 2); + assert(paraRecord.optionFlag & (create_flag_moves1 | create_flag_moves2)); //assert(t->board); t->board->newGame(fenString); int flag = bslib::BoardCore::ParseMoveListFlag_quick_check; - if (createoption_comment_discard) { + if (paraRecord.optionFlag & create_flag_discard_comments) { flag |= bslib::BoardCore::ParseMoveListFlag_discardComment; } t->board->fromMoveList(gameID, moveText, bslib::Notation::san, flag); plyCount = t->board->getHistListSize(); + + if (paraRecord.limitLen > plyCount) { + return false; + } + if (plyCount > 0) { auto p = t->buf; for(auto i = 0; i < plyCount; i++) { auto h = t->board->_getHistPointerAt(i); auto move = h->move; - if (createoption_EncodeMoveSize == 1) { + if (paraRecord.optionFlag & create_flag_moves2) { // 2 bytes encoding + *(int16_t*)p = bslib::ChessBoard::encode2Bytes(move); + p += 2; + } else if (paraRecord.optionFlag & create_flag_moves1) { auto pair = bslib::ChessBoard::encode1Byte(move); assert(pair.second == 1 || pair.second == 2); if (pair.second == 1) { @@ -1101,9 +1123,6 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha assert(*(p + 1) == static_cast(pair.first >> 8)); p += 2; } - } else if (createoption_EncodeMoveSize == 2) { // 2 bytes encoding - *(int16_t*)p = bslib::ChessBoard::encode2Bytes(move); - p += 2; } if (!h->comment.empty()) { @@ -1149,6 +1168,46 @@ bool Builder::addGame(const std::unordered_map& itemMap, const cha return true; } +bool Builder::queryGame(const std::unordered_map& itemMap, const char* moveText) +{ + auto threadId = std::this_thread::get_id(); + auto t = &threadMap[threadId]; + t->init(mDb); + assert(t->board); + + std::string fenString; + for(auto && it : itemMap) { + if (strcmp(it.first, "FEN") == 0) { + fenString = it.second; + break; + } + } + + IDInteger gameID; + { + std::lock_guard dolock(gameMutex); + ++gameCnt; + gameID = gameCnt; + } + + // Parse moves + { + //assert(t->board); + t->board->newGame(fenString); + + int flag = bslib::BoardCore::ParseMoveListFlag_quick_check + | bslib::BoardCore::ParseMoveListFlag_discardComment + | bslib::BoardCore::ParseMoveListFlag_create_bitboard; + + if (paraRecord.optionFlag & query_flag_print_pgn) { + flag |= bslib::BoardCore::ParseMoveListFlag_create_san; + } + t->board->fromMoveList(gameID, moveText, bslib::Notation::san, flag, checkToStop); + } + + return true; +} + void Builder::queryGameData(SQLite::Database& db, int gameIdx) { auto ok = false; @@ -1291,8 +1350,9 @@ void Builder::parsePGNGame(int64_t gameID, const std::string& fenText, } -void Builder::searchPosition(SQLite::Database& db, const std::string& query) +void Builder::searchPosition(SQLite::Database* db, const std::vector& pgnPaths, const std::string& query) { + assert(paraRecord.task != Task::create); auto parser = new Parser; if (!parser->parse(query.c_str())) { std::cerr << "Error: " << parser->getErrorString() << std::endl; @@ -1303,10 +1363,10 @@ void Builder::searchPosition(SQLite::Database& db, const std::string& query) auto startTime = getNow(); // check if there at least a move fields (Moves, Moves1 or Moves2) - { + if (db) { searchField = SearchField::none; - SQLite::Statement stmt(db, "PRAGMA table_info(Games)"); + SQLite::Statement stmt(*db, "PRAGMA table_info(Games)"); while (stmt.executeStep()) { std::string fieldName = stmt.getColumn(1).getText(); @@ -1339,13 +1399,14 @@ void Builder::searchPosition(SQLite::Database& db, const std::string& query) if (parser->evaluate(bitboardVec)) { succCount++; - if ((query_flag & query_flag_print_all) || (succCount & 0xffff) == 0) { + if (paraRecord.optionFlag & query_flag_print_all) { std::cout << succCount << ". gameId: " << gameId; - if (query_flag & query_flag_print_fen) { + if (paraRecord.optionFlag & query_flag_print_fen) { std::cout << ", fen: " << board->getFen(); } - if (query_flag & query_flag_print_pgn) { - std::cout << ", pgn:\n " << board->toSimplePgn(); + if (paraRecord.optionFlag & query_flag_print_pgn) { + std::unordered_map tags; + std::cout << "\n\n" << board->toPgn(tags); } std::cout << std::endl; } @@ -1367,40 +1428,48 @@ void Builder::searchPosition(SQLite::Database& db, const std::string& query) << ", searchField: " << moveFieldName << std::endl; - succCount = 0; + succCount = 0; gameCnt = 0; - SQLite::Statement statement(db, "SELECT * FROM Games"); - int64_t cnt = 0; - for (; statement.executeStep(); cnt++) { - auto gameID = statement.getColumn("ID").getInt64(); - auto fenText = statement.getColumn("FEN").getText(); - - std::string moveText; - std::vector moveVec; - - if (searchField == SearchField::moves) { - moveText = statement.getColumn("Moves").getText(); - } else { - auto c = statement.getColumn(moveFieldName.c_str()); - auto moveBlob = static_cast(c.getBlob()); + if (db) { + SQLite::Statement statement(*db, "SELECT * FROM Games"); + for (; statement.executeStep(); gameCnt++) { + auto gameID = statement.getColumn("ID").getInt64(); + auto fenText = statement.getColumn("FEN").getText(); - if (!moveBlob) { - continue; + std::string moveText; + std::vector moveVec; + + if (searchField == SearchField::moves) { + moveText = statement.getColumn("Moves").getText(); + } else { + auto c = statement.getColumn(moveFieldName.c_str()); + auto moveBlob = static_cast(c.getBlob()); + + if (!moveBlob) { + continue; + } + auto sz = c.size(); + + for(auto i = 0; i < sz; ++i) { + moveVec.push_back(moveBlob[i]); + } } - auto sz = c.size(); - - for(auto i = 0; i < sz; ++i) { - moveVec.push_back(moveBlob[i]); + threadParsePGNGame(gameID, fenText, moveText, moveVec); + + if (succCount >= paraRecord.resultNumberLimit) { + break; } } - threadParsePGNGame(gameID, fenText, moveText, moveVec); + pool->wait_for_tasks(); - if (succCount && (query_flag & query_flag_stop_at_first_result)) { - break; + } else { + assert(!pgnPaths.empty()); + for(auto && path : pgnPaths) { + processPgnFile(path); } } - pool->wait_for_tasks(); + // Done, retrieve some last stats int64_t parsedGameCnt = 0, allHdpLen = 0; for(auto && t : threadMap) { parsedGameCnt += t.second.gameCnt; @@ -1411,6 +1480,7 @@ void Builder::searchPosition(SQLite::Database& db, const std::string& query) std::cout << std::endl << query << " DONE. Elapsed: " << elapsed << " ms, " << bslib::Funcs::secondToClockString(static_cast(elapsed / 1000), ":") + << ", total games: " << gameCnt << ", total results: " << succCount << ", time per results: " << elapsed / std::max(1, succCount) << " ms" << std::endl << std::endl << std::endl; @@ -1418,7 +1488,7 @@ void Builder::searchPosition(SQLite::Database& db, const std::string& query) delete parser; } -void Builder::bench(const std::string& dbPath, int cpu, const std::set