Skip to content

Commit

Permalink
chore(core): better diagnostics for table repair (#3767)
Browse files Browse the repository at this point in the history
  • Loading branch information
puzpuzpuz committed Sep 29, 2023
1 parent 54de3f4 commit c1a4b7f
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 22 deletions.
13 changes: 5 additions & 8 deletions core/src/main/java/io/questdb/TelemetryConfigLogger.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ private void appendConfigRow(CairoEngine engine, SqlCompiler compiler, TableWrit
LOG.info()
.$("new instance [id=").$256(a, b, 0, 0)
.$(", enabled=").$(enabled)
.$(']').$();
.I$();
} else {
row.putLong256(0, id);
}
Expand Down Expand Up @@ -106,10 +106,7 @@ private TableWriter updateTelemetryConfig(
SqlExecutionContextImpl sqlExecutionContext,
TableToken tableToken
) throws SqlException {
final TableWriter configWriter = engine.getWriter(
tableToken,
"telemetryConfig"
);
final TableWriter configWriter = engine.getWriter(tableToken, "telemetryConfig");
final CompiledQuery cc = compiler.query().$(TELEMETRY_CONFIG_TABLE_NAME).$(" LIMIT -1").compile(sqlExecutionContext);
try (
final RecordCursorFactory factory = cc.getRecordCursorFactory();
Expand All @@ -129,12 +126,12 @@ private TableWriter updateTelemetryConfig(
LOG.advisory()
.$("instance config changes [id=").$256(l256.getLong0(), l256.getLong1(), 0, 0)
.$(", enabled=").$(enabled)
.$(']').$();
.I$();
} else {
LOG.advisory()
.$("instance [id=").$256(l256.getLong0(), l256.getLong1(), 0, 0)
.$(", enabled=").$(enabled)
.$(']').$();
.I$();
}
} else {
// if there are no record for telemetry id we need to create one using clocks
Expand Down Expand Up @@ -165,7 +162,7 @@ void init(CairoEngine engine, SqlCompiler compiler, SqlExecutionContextImpl sqlE
.$("could not open [table=`").utf8(TELEMETRY_CONFIG_TABLE_NAME)
.$("`, ex=").$(ex.getFlyweightMessage())
.$(", errno=").$(ex.getErrno())
.$(']').$();
.I$();
}
}
}
22 changes: 15 additions & 7 deletions core/src/main/java/io/questdb/cairo/CairoEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ public void getTableTokens(ObjHashSet<TableToken> bucket, boolean includeDropped
}

@Override
public TableWriterAPI getTableWriterAPI(TableToken tableToken, @Nullable String lockReason) {
public TableWriterAPI getTableWriterAPI(TableToken tableToken, @NotNull String lockReason) {
verifyTableToken(tableToken);
if (!tableToken.isWal()) {
return writerPool.get(tableToken, lockReason);
Expand All @@ -654,7 +654,7 @@ public TableWriterAPI getTableWriterAPI(TableToken tableToken, @Nullable String
}

@Override
public TableWriterAPI getTableWriterAPI(CharSequence tableName, String lockReason) {
public TableWriterAPI getTableWriterAPI(CharSequence tableName, @NotNull String lockReason) {
return getTableWriterAPI(verifyTableNameForRead(tableName), lockReason);
}

Expand Down Expand Up @@ -702,7 +702,7 @@ public WalReader getWalReader(
return walWriterPool.get(tableToken);
}

public TableWriter getWriter(TableToken tableToken, String lockReason) {
public TableWriter getWriter(TableToken tableToken, @NotNull String lockReason) {
verifyTableToken(tableToken);
return writerPool.get(tableToken, lockReason);
}
Expand All @@ -716,7 +716,7 @@ public Map<CharSequence, WriterPool.Entry> getWriterPoolEntries() {
return writerPool.entries();
}

public TableWriter getWriterUnsafe(TableToken tableToken, String lockReason) {
public TableWriter getWriterUnsafe(TableToken tableToken, @NotNull String lockReason) {
return writerPool.get(tableToken, lockReason);
}

Expand Down Expand Up @@ -1178,16 +1178,24 @@ private TableToken rename0(Path fromPath, TableToken fromTableToken, Path toPath
}
}

private void tryRepairTable(TableToken tableToken, RuntimeException rethrow) {
private void tryRepairTable(TableToken tableToken, CairoException rethrow) {
LOG.info()
.$("starting table repair [table=").$(tableToken)
.$(", dirName=").utf8(tableToken.getDirName())
.$(", cause=").$(rethrow.getFlyweightMessage())
.I$();
try {
writerPool.get(tableToken, "repair").close();
LOG.info().$("table repair succeeded [table=").$(tableToken).I$();
} catch (EntryUnavailableException e) {
// This is fine, writer is busy. Throw back origin error.
LOG.info().$("writer is busy, skipping repair [table=").$(tableToken).I$();
throw rethrow;
} catch (Throwable th) {
LOG.critical()
.$("could not repair before reading [dirName=").utf8(tableToken.getDirName())
.$(" ,error=").$(th.getMessage()).I$();
.$("table repair failed [dirName=").utf8(tableToken.getDirName())
.$(", error=").$(th.getMessage())
.I$();
throw rethrow;
}
}
Expand Down
7 changes: 3 additions & 4 deletions core/src/main/java/io/questdb/cairo/pool/WriterPool.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public Map<CharSequence, Entry> entries() {
* @param lockReason description of where or why lock is held
* @return cached TableWriter instance.
*/
public TableWriter get(TableToken tableToken, String lockReason) {
public TableWriter get(TableToken tableToken, @NotNull String lockReason) {
return getWriterEntry(tableToken, lockReason, null);
}

Expand Down Expand Up @@ -160,7 +160,7 @@ public int getBusyCount() {
*/
public TableWriter getWriterOrPublishCommand(
TableToken tableToken,
String lockReason,
@NotNull String lockReason,
@NotNull AsyncWriterCommand asyncWriterCommand
) {
while (true) {
Expand Down Expand Up @@ -416,10 +416,9 @@ private TableWriter createWriter(TableToken tableToken, Entry e, long thread, St

private TableWriter getWriterEntry(
TableToken tableToken,
String lockReason,
@NotNull String lockReason,
@Nullable AsyncWriterCommand asyncWriterCommand
) {
assert null != lockReason;
checkClosed();

long thread = Thread.currentThread().getId();
Expand Down
5 changes: 3 additions & 2 deletions core/src/main/java/io/questdb/cairo/pool/WriterSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@

import io.questdb.cairo.TableToken;
import io.questdb.cairo.TableWriterAPI;
import org.jetbrains.annotations.NotNull;

public interface WriterSource {
TableWriterAPI getTableWriterAPI(TableToken tableToken, String lockReason);
TableWriterAPI getTableWriterAPI(TableToken tableToken, @NotNull String lockReason);

TableWriterAPI getTableWriterAPI(CharSequence tableName, String lockReason);
TableWriterAPI getTableWriterAPI(CharSequence tableName, @NotNull String lockReason);
}
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ long applyWal(
if (tableBusy.getReason() != NO_LOCK_REASON
&& !WAL_2_TABLE_WRITE_REASON.equals(tableBusy.getReason())
&& !WAL_2_TABLE_RESUME_REASON.equals(tableBusy.getReason())) {
LOG.critical().$("unsolicited table lock [table=").utf8(tableToken.getDirName()).$(", lock_reason=").$(tableBusy.getReason()).I$();
LOG.critical().$("unsolicited table lock [table=").utf8(tableToken.getDirName()).$(", lockReason=").$(tableBusy.getReason()).I$();
// This is abnormal termination but table is not set to suspended state.
// Reset state of SeqTxnTracker so that next CheckWalTransactionJob run will send job notification if necessary.
engine.notifyWalTxnRepublisher(tableToken);
Expand Down

0 comments on commit c1a4b7f

Please sign in to comment.