-
Notifications
You must be signed in to change notification settings - Fork 670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Prune old events from sqlite database #4737
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,12 @@ type Debug = { | |
raw: boolean; | ||
}; | ||
|
||
type SqliteConfig = | ||
| { | ||
maxDaysHistory: number | undefined; | ||
} | ||
| undefined; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that type seems odd to me? You shouldn't set that to an object or undefined, things that want it optional shall say so when they mean it. Meaning, the user of the type should do export type ConfigType = {
// ...
sqliteConfig?: SqliteConfig;
// ...
} Or whatever, but not that funny type that is intrinsically this way. |
||
export type ConfigType = { | ||
public: boolean; | ||
host: string | undefined; | ||
|
@@ -97,6 +103,7 @@ export type ConfigType = { | |
defaults: Defaults; | ||
lockNetwork: boolean; | ||
messageStorage: string[]; | ||
sqliteConfig: SqliteConfig; | ||
useHexIp: boolean; | ||
webirc?: WebIRC; | ||
identd: Identd; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,16 +44,36 @@ class Deferred { | |
} | ||
} | ||
|
||
type SqliteConfig = typeof Config.values.sqliteConfig; | ||
|
||
class SqliteMessageStorage implements SearchableMessageStorage { | ||
isEnabled: boolean; | ||
database!: Database; | ||
initDone: Deferred; | ||
userName: string; | ||
mergedConfig: typeof Config.values.sqliteConfig; | ||
|
||
scheduledIntervalId: ReturnType<typeof setInterval> | undefined; | ||
|
||
constructor(userName: string) { | ||
constructor(userName: string, clientStorageConfig: SqliteConfig) { | ||
this.userName = userName; | ||
this.isEnabled = false; | ||
this.initDone = new Deferred(); | ||
|
||
this.mergedConfig = this.mergeClientConfig(clientStorageConfig); | ||
} | ||
|
||
mergeClientConfig(clientStorageConfig: SqliteConfig) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't the right place for that kind of logic. Yes, currently sqlite is our only implementation of the interface, but that doesn't mean that we should violate the layers. |
||
const globalDays = Config.values?.sqliteConfig?.maxDaysHistory || 0; | ||
const clientDays = clientStorageConfig?.maxDaysHistory || 0; | ||
return { | ||
maxDaysHistory: | ||
globalDays <= 0 | ||
? clientDays | ||
: clientDays > 0 | ||
? Math.min(globalDays, clientDays) | ||
: globalDays, | ||
}; | ||
} | ||
|
||
async _enable() { | ||
|
@@ -76,6 +96,8 @@ class SqliteMessageStorage implements SearchableMessageStorage { | |
this.isEnabled = false; | ||
throw Helper.catch_to_error("Migration failed", e); | ||
} | ||
|
||
this.schedulePruning(); | ||
} | ||
|
||
async enable() { | ||
|
@@ -124,6 +146,11 @@ class SqliteMessageStorage implements SearchableMessageStorage { | |
} | ||
|
||
async close() { | ||
if (this.scheduledIntervalId) { | ||
clearInterval(this.scheduledIntervalId); | ||
this.scheduledIntervalId = undefined; | ||
} | ||
|
||
if (!this.isEnabled) { | ||
return; | ||
} | ||
|
@@ -172,6 +199,49 @@ class SqliteMessageStorage implements SearchableMessageStorage { | |
); | ||
} | ||
|
||
schedulePruning() { | ||
const keepNdays = this.mergedConfig?.maxDaysHistory || 0; | ||
|
||
if (!keepNdays || keepNdays <= 0) { | ||
return; | ||
} | ||
|
||
if (this.scheduledIntervalId) { | ||
clearInterval(this.scheduledIntervalId); | ||
} | ||
|
||
// Probably best to not make these things configurable | ||
// to avoid users setting high values and freezing their instance | ||
const runFrequencyMilliseconds = 1000 * 60 * 5; // Every 5 min | ||
const deleteAtMostN = 1000; | ||
|
||
this.scheduledIntervalId = setInterval(() => { | ||
this.pruneOldEvents(keepNdays, deleteAtMostN).catch((err) => | ||
log.error("Pruning failed: ", err) | ||
); | ||
}, runFrequencyMilliseconds); | ||
} | ||
|
||
async pruneOldEvents(keepNdays: number, deleteAtMostN: number) { | ||
// Delete oldest events (up to `deleteAtMostN`) older than `keepNdays` | ||
await this.initDone.promise; | ||
|
||
if (!this.isEnabled) { | ||
return; | ||
} | ||
|
||
// We roughly get a timestamp from N days before. | ||
// We don't adjust for daylight savings time or other weird time jumps | ||
const millisecondsInDay = 24 * 60 * 60 * 1000; | ||
const deleteBefore = Date.now() - keepNdays * millisecondsInDay; | ||
await this.serialize_run( | ||
`DELETE FROM messages WHERE rowid in ( | ||
SELECT rowid FROM messages WHERE time < ? ORDER BY time ASC LIMIT ? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you enforcing an ordering here? You could just let the DB do it in whatever order it sees fit, generally you'd assume that you can keep up within a single invocation right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we do delete in batches of only 1000 events, I felt wrong creating a "hole" in history. Since the index is already there, I think there is no harm in doing it this way right? |
||
)`, | ||
[deleteBefore, deleteAtMostN] | ||
); | ||
} | ||
|
||
async deleteChannel(network: Network, channel: Channel) { | ||
await this.initDone.promise; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,54 @@ | ||
/* eslint-disable @typescript-eslint/no-unsafe-return */ | ||
import fs from "fs"; | ||
import _ from "lodash"; | ||
import fs from "fs/promises"; | ||
import path from "path"; | ||
import {expect} from "chai"; | ||
import util from "../util"; | ||
import Msg, {MessageType} from "../../server/models/msg"; | ||
import Config from "../../server/config"; | ||
import MessageStorage from "../../server/plugins/messageStorage/sqlite"; | ||
|
||
describe("SQLite Message Storage", function () { | ||
async function exists(filePath: string) { | ||
try { | ||
await fs.access(filePath); | ||
return true; | ||
} catch { | ||
return false; | ||
} | ||
} | ||
|
||
async function cleanup() { | ||
const dirpath = path.join(Config.getHomePath(), "logs"); | ||
|
||
if (await exists(dirpath)) { | ||
await fs.rm(dirpath, {recursive: true}); | ||
} | ||
} | ||
|
||
describe("SQLite Message Storage (stateful tests)", function () { | ||
// Increase timeout due to unpredictable I/O on CI services | ||
this.timeout(util.isRunningOnCI() ? 25000 : 5000); | ||
this.slow(300); | ||
|
||
const expectedPath = path.join(Config.getHomePath(), "logs", "testUser.sqlite3"); | ||
let store: MessageStorage; | ||
|
||
before(function (done) { | ||
store = new MessageStorage("testUser"); | ||
|
||
before(async function () { | ||
// Delete database file from previous test run | ||
if (fs.existsSync(expectedPath)) { | ||
fs.unlink(expectedPath, done); | ||
} else { | ||
done(); | ||
} | ||
await cleanup(); | ||
|
||
store = new MessageStorage("testUser", undefined as any); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. such casts are very brittle... if you can avoid so please do and in this case you trivially could by providing a correct default value |
||
}); | ||
|
||
after(function (done) { | ||
after(async function () { | ||
// After tests run, remove the logs folder | ||
// so we return to the clean state | ||
fs.unlinkSync(expectedPath); | ||
fs.rmdir(path.join(Config.getHomePath(), "logs"), done); | ||
await cleanup(); | ||
}); | ||
|
||
it("should create database file", async function () { | ||
expect(store.isEnabled).to.be.false; | ||
expect(fs.existsSync(expectedPath)).to.be.false; | ||
expect(await exists(expectedPath)).to.be.false; | ||
|
||
await store.enable(); | ||
expect(store.isEnabled).to.be.true; | ||
|
@@ -232,6 +245,102 @@ describe("SQLite Message Storage", function () { | |
|
||
it("should close database", async function () { | ||
await store.close(); | ||
expect(fs.existsSync(expectedPath)).to.be.true; | ||
expect(await exists(expectedPath)).to.be.true; | ||
}); | ||
}); | ||
|
||
describe("SQLite Message Storage (stateless tests)", function () { | ||
// Increase timeout due to unpredictable I/O on CI services | ||
this.timeout(util.isRunningOnCI() ? 25000 : 5000); | ||
this.slow(300); | ||
|
||
let store: MessageStorage; | ||
beforeEach(async function () { | ||
await cleanup(); | ||
}); | ||
|
||
afterEach(async function () { | ||
await store.close(); | ||
await cleanup(); | ||
}); | ||
|
||
it("Should not schedule pruning because of server and client settings", async function () { | ||
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory; | ||
_.set(Config.values, "sqliteConfig.maxDaysHistory", undefined); | ||
|
||
store = new MessageStorage("testUser", {maxDaysHistory: 0}); | ||
await store.enable(); | ||
expect(store.scheduledIntervalId).to.be.undefined; | ||
|
||
_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays); | ||
}); | ||
|
||
it("Should schedule pruning because of client settings", async function () { | ||
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory; | ||
|
||
_.set(Config.values, "sqliteConfig.maxDaysHistory", undefined); | ||
store = new MessageStorage("testUser", {maxDaysHistory: 1}); | ||
await store.enable(); | ||
expect(store.scheduledIntervalId).to.not.be.undefined; | ||
|
||
_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays); | ||
}); | ||
|
||
it("Should schedule pruning because of server settings", async function () { | ||
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory; | ||
|
||
_.set(Config.values, "sqliteConfig.maxDaysHistory", 1); | ||
store = new MessageStorage("testUser", {maxDaysHistory: 0}); | ||
await store.enable(); | ||
expect(store.scheduledIntervalId).to.not.be.undefined; | ||
|
||
_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays); | ||
}); | ||
|
||
it("Should only prune old messages", async function () { | ||
store = new MessageStorage("testUser", undefined); | ||
await store.enable(); | ||
|
||
const dayInMs = 24 * 60 * 60 * 1000; | ||
const now = Date.now(); | ||
|
||
const network = {uuid: "network-guid"}; | ||
const chan = {name: "#channel"}; | ||
|
||
// First insert lots of messages. | ||
for (let i = 0; i < 100; ++i) { | ||
// Each event is 1 day older | ||
await store.index( | ||
network as any, | ||
chan as any, | ||
new Msg({ | ||
time: new Date(now - i * dayInMs), | ||
text: `${i}`, | ||
}) | ||
); | ||
} | ||
|
||
let msgid = 0; | ||
let messages = await store.getMessages(network as any, chan as any, () => msgid++); | ||
expect(messages).to.have.length(100); | ||
|
||
// Delete events older than 90 days but limit to only 1 event | ||
await store.pruneOldEvents(90, 1); | ||
|
||
messages = await store.getMessages(network as any, chan as any, () => msgid++); | ||
expect(messages).to.have.length(99); | ||
// make sure the oldest event (text = 99) was deleted | ||
const found_msgs = new Set(messages.map((msg) => msg.text)); | ||
expect(found_msgs.has("99")).to.be.false; | ||
|
||
// Delete events older than 90 days | ||
await store.pruneOldEvents(90, 1000); | ||
messages = await store.getMessages(network as any, chan as any, () => msgid++); | ||
expect(messages).to.have.length(90); | ||
|
||
// Delete events older than 1 day | ||
await store.pruneOldEvents(1, 1000); | ||
messages = await store.getMessages(network as any, chan as any, () => msgid++); | ||
expect(messages).to.have.length(1); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
exactly undefined
We should have one way and only one way of configuring something.