Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prune old events from sqlite database #4737

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions defaults/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,16 @@ module.exports = {
// This value is set to `["sqlite", "text"]` by default.
messageStorage: ["sqlite", "text"],

// ### `sqliteConfig`
//
// `maxDaysHistory`:
// Defines the maximum number of days of history to keep in the database.
// Undefined/-1/0 is treated an unlimited.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exactly undefined
We should have one way and only one way of configuring something.

// Users can overwrite setting to make the duration shorter
sqliteConfig: {
maxDaysHistory: undefined,
},

// ### `useHexIp`
//
// When set to `true`, users' IP addresses will be encoded as hex.
Expand Down
6 changes: 5 additions & 1 deletion server/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export type UserConfig = {
pushSubscription?: ClientPushSubscription;
};
};
storage: typeof Config.values.sqliteConfig;
clientSettings: {
[key: string]: any;
};
Expand Down Expand Up @@ -137,7 +138,10 @@ class Client {

if (!Config.values.public && client.config.log) {
if (Config.values.messageStorage.includes("sqlite")) {
client.messageProvider = new SqliteMessageStorage(client.name);
client.messageProvider = new SqliteMessageStorage(
client.name,
client.config.storage
);
client.messageStorage.push(client.messageProvider);
}

Expand Down
7 changes: 7 additions & 0 deletions server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ type Debug = {
raw: boolean;
};

type SqliteConfig =
| {
maxDaysHistory: number | undefined;
}
| undefined;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that type seems odd to me?

You shouldn't set that to an object or undefined, things that want it optional shall say so when they mean it.

Meaning, the user of the type should do

export type ConfigType = {
 // ...
sqliteConfig?: SqliteConfig;
// ...
}

Or whatever, but not that funny type that is intrinsically this way.
That's a hassle to use

export type ConfigType = {
public: boolean;
host: string | undefined;
Expand All @@ -97,6 +103,7 @@ export type ConfigType = {
defaults: Defaults;
lockNetwork: boolean;
messageStorage: string[];
sqliteConfig: SqliteConfig;
useHexIp: boolean;
webirc?: WebIRC;
identd: Identd;
Expand Down
72 changes: 71 additions & 1 deletion server/plugins/messageStorage/sqlite.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,36 @@ class Deferred {
}
}

type SqliteConfig = typeof Config.values.sqliteConfig;

class SqliteMessageStorage implements SearchableMessageStorage {
isEnabled: boolean;
database!: Database;
initDone: Deferred;
userName: string;
mergedConfig: typeof Config.values.sqliteConfig;

scheduledIntervalId: ReturnType<typeof setInterval> | undefined;

constructor(userName: string) {
constructor(userName: string, clientStorageConfig: SqliteConfig) {
this.userName = userName;
this.isEnabled = false;
this.initDone = new Deferred();

this.mergedConfig = this.mergeClientConfig(clientStorageConfig);
}

mergeClientConfig(clientStorageConfig: SqliteConfig) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't the right place for that kind of logic.
The config needs to be merged by the core code, not the sqlite specific storage backend.

Yes, currently sqlite is our only implementation of the interface, but that doesn't mean that we should violate the layers.

const globalDays = Config.values?.sqliteConfig?.maxDaysHistory || 0;
const clientDays = clientStorageConfig?.maxDaysHistory || 0;
return {
maxDaysHistory:
globalDays <= 0
? clientDays
: clientDays > 0
? Math.min(globalDays, clientDays)
: globalDays,
};
}

async _enable() {
Expand All @@ -76,6 +96,8 @@ class SqliteMessageStorage implements SearchableMessageStorage {
this.isEnabled = false;
throw Helper.catch_to_error("Migration failed", e);
}

this.schedulePruning();
}

async enable() {
Expand Down Expand Up @@ -124,6 +146,11 @@ class SqliteMessageStorage implements SearchableMessageStorage {
}

async close() {
if (this.scheduledIntervalId) {
clearInterval(this.scheduledIntervalId);
this.scheduledIntervalId = undefined;
}

if (!this.isEnabled) {
return;
}
Expand Down Expand Up @@ -172,6 +199,49 @@ class SqliteMessageStorage implements SearchableMessageStorage {
);
}

schedulePruning() {
const keepNdays = this.mergedConfig?.maxDaysHistory || 0;

if (!keepNdays || keepNdays <= 0) {
return;
}

if (this.scheduledIntervalId) {
clearInterval(this.scheduledIntervalId);
}

// Probably best to not make these things configurable
// to avoid users setting high values and freezing their instance
const runFrequencyMilliseconds = 1000 * 60 * 5; // Every 5 min
const deleteAtMostN = 1000;

this.scheduledIntervalId = setInterval(() => {
this.pruneOldEvents(keepNdays, deleteAtMostN).catch((err) =>
log.error("Pruning failed: ", err)
);
}, runFrequencyMilliseconds);
}

async pruneOldEvents(keepNdays: number, deleteAtMostN: number) {
// Delete oldest events (up to `deleteAtMostN`) older than `keepNdays`
await this.initDone.promise;

if (!this.isEnabled) {
return;
}

// We roughly get a timestamp from N days before.
// We don't adjust for daylight savings time or other weird time jumps
const millisecondsInDay = 24 * 60 * 60 * 1000;
const deleteBefore = Date.now() - keepNdays * millisecondsInDay;
await this.serialize_run(
`DELETE FROM messages WHERE rowid in (
SELECT rowid FROM messages WHERE time < ? ORDER BY time ASC LIMIT ?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you enforcing an ordering here?
While I get the intention behind it, usually the messages in question are not visible to the client.

You could just let the DB do it in whatever order it sees fit, generally you'd assume that you can keep up within a single invocation right?
We have an index on the time, so it's probably relatively easy to order, but still.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we do delete in batches of only 1000 events, I felt wrong creating a "hole" in history. Since the index is already there, I think there is no harm in doing it this way right?

)`,
[deleteBefore, deleteAtMostN]
);
}

async deleteChannel(network: Network, channel: Channel) {
await this.initDone.promise;

Expand Down
2 changes: 2 additions & 0 deletions test/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ describe("Client", function () {
sessions: {},
clientSettings: {},
networks: [networkConfig],
storage: undefined,
});

// The client would normally do it as part of client.connect();
Expand Down Expand Up @@ -92,6 +93,7 @@ describe("Client", function () {
sessions: {},
clientSettings: {},
networks: [networkConfig],
storage: undefined,
});

// The client would normally do it as part of client.connect();
Expand Down
139 changes: 124 additions & 15 deletions test/plugins/sqlite.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,54 @@
/* eslint-disable @typescript-eslint/no-unsafe-return */
import fs from "fs";
import _ from "lodash";
import fs from "fs/promises";
import path from "path";
import {expect} from "chai";
import util from "../util";
import Msg, {MessageType} from "../../server/models/msg";
import Config from "../../server/config";
import MessageStorage from "../../server/plugins/messageStorage/sqlite";

describe("SQLite Message Storage", function () {
async function exists(filePath: string) {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}

async function cleanup() {
const dirpath = path.join(Config.getHomePath(), "logs");

if (await exists(dirpath)) {
await fs.rm(dirpath, {recursive: true});
}
}

describe("SQLite Message Storage (stateful tests)", function () {
// Increase timeout due to unpredictable I/O on CI services
this.timeout(util.isRunningOnCI() ? 25000 : 5000);
this.slow(300);

const expectedPath = path.join(Config.getHomePath(), "logs", "testUser.sqlite3");
let store: MessageStorage;

before(function (done) {
store = new MessageStorage("testUser");

before(async function () {
// Delete database file from previous test run
if (fs.existsSync(expectedPath)) {
fs.unlink(expectedPath, done);
} else {
done();
}
await cleanup();

store = new MessageStorage("testUser", undefined as any);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

such casts are very brittle... if you can avoid so please do and in this case you trivially could by providing a correct default value

});

after(function (done) {
after(async function () {
// After tests run, remove the logs folder
// so we return to the clean state
fs.unlinkSync(expectedPath);
fs.rmdir(path.join(Config.getHomePath(), "logs"), done);
await cleanup();
});

it("should create database file", async function () {
expect(store.isEnabled).to.be.false;
expect(fs.existsSync(expectedPath)).to.be.false;
expect(await exists(expectedPath)).to.be.false;

await store.enable();
expect(store.isEnabled).to.be.true;
Expand Down Expand Up @@ -232,6 +245,102 @@ describe("SQLite Message Storage", function () {

it("should close database", async function () {
await store.close();
expect(fs.existsSync(expectedPath)).to.be.true;
expect(await exists(expectedPath)).to.be.true;
});
});

describe("SQLite Message Storage (stateless tests)", function () {
// Increase timeout due to unpredictable I/O on CI services
this.timeout(util.isRunningOnCI() ? 25000 : 5000);
this.slow(300);

let store: MessageStorage;
beforeEach(async function () {
await cleanup();
});

afterEach(async function () {
await store.close();
await cleanup();
});

it("Should not schedule pruning because of server and client settings", async function () {
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory;
_.set(Config.values, "sqliteConfig.maxDaysHistory", undefined);

store = new MessageStorage("testUser", {maxDaysHistory: 0});
await store.enable();
expect(store.scheduledIntervalId).to.be.undefined;

_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays);
});

it("Should schedule pruning because of client settings", async function () {
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory;

_.set(Config.values, "sqliteConfig.maxDaysHistory", undefined);
store = new MessageStorage("testUser", {maxDaysHistory: 1});
await store.enable();
expect(store.scheduledIntervalId).to.not.be.undefined;

_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays);
});

it("Should schedule pruning because of server settings", async function () {
const originalMaxDays = Config.values?.sqliteConfig?.maxDaysHistory;

_.set(Config.values, "sqliteConfig.maxDaysHistory", 1);
store = new MessageStorage("testUser", {maxDaysHistory: 0});
await store.enable();
expect(store.scheduledIntervalId).to.not.be.undefined;

_.set(Config.values, "sqliteConfig.maxDaysHistory", originalMaxDays);
});

it("Should only prune old messages", async function () {
store = new MessageStorage("testUser", undefined);
await store.enable();

const dayInMs = 24 * 60 * 60 * 1000;
const now = Date.now();

const network = {uuid: "network-guid"};
const chan = {name: "#channel"};

// First insert lots of messages.
for (let i = 0; i < 100; ++i) {
// Each event is 1 day older
await store.index(
network as any,
chan as any,
new Msg({
time: new Date(now - i * dayInMs),
text: `${i}`,
})
);
}

let msgid = 0;
let messages = await store.getMessages(network as any, chan as any, () => msgid++);
expect(messages).to.have.length(100);

// Delete events older than 90 days but limit to only 1 event
await store.pruneOldEvents(90, 1);

messages = await store.getMessages(network as any, chan as any, () => msgid++);
expect(messages).to.have.length(99);
// make sure the oldest event (text = 99) was deleted
const found_msgs = new Set(messages.map((msg) => msg.text));
expect(found_msgs.has("99")).to.be.false;

// Delete events older than 90 days
await store.pruneOldEvents(90, 1000);
messages = await store.getMessages(network as any, chan as any, () => msgid++);
expect(messages).to.have.length(90);

// Delete events older than 1 day
await store.pruneOldEvents(1, 1000);
messages = await store.getMessages(network as any, chan as any, () => msgid++);
expect(messages).to.have.length(1);
});
});