diff --git a/changelog.d/1428.feature b/changelog.d/1428.feature new file mode 100644 index 000000000..fca637986 --- /dev/null +++ b/changelog.d/1428.feature @@ -0,0 +1 @@ +Truncate original messages more gently when replying diff --git a/spec/unit/Template.spec.js b/spec/unit/Template.spec.js index 1f9bbc48d..e4d1df7b4 100644 --- a/spec/unit/Template.spec.js +++ b/spec/unit/Template.spec.js @@ -2,14 +2,12 @@ const { renderTemplate } = require("../../lib/util/Template.js"); describe("renderTemplate", function() { - it("should replace placeholders with submitted values", (done) => { + it("should replace placeholders with submitted values", () => { const template = '$FOO bar $BAZ'; const result = renderTemplate(template, { foo: 'one', baz: 'two', }); expect(result).toEqual('one bar two'); - - done(); }); }); diff --git a/spec/unit/TrimString.spec.js b/spec/unit/TrimString.spec.js new file mode 100644 index 000000000..ca94f860f --- /dev/null +++ b/spec/unit/TrimString.spec.js @@ -0,0 +1,28 @@ +"use strict"; +const { trimString } = require("../../lib/util/TrimString.js"); + +describe("trimString", function() { + it("should not cut unicode characters in half", () => { + const input = "lol 😅"; + const result = trimString(input, 5); + expect(result).toEqual(input); + }); + + it("should trim trailing whitespace by itself", () => { + const input = "lol 😅"; + const result = trimString(input, 4); + expect(result).toEqual('lol'); + }); + + it('should stop trimming at the word boundary if reasonable', () => { + const input = "this sentence is waaaaay too long"; + const result = trimString(input, 20); + expect(result).toEqual('this sentence is'); + }); + + it('should give up looking for a word boundary if result would become too short', () => { + const input = "we're in Llanfairpwllgwyngyll"; + const result = trimString(input, 24); + expect(result).toContain("we're in Llan"); + }); +}); diff --git a/src/bridge/IrcBridge.ts b/src/bridge/IrcBridge.ts index 1bea410ab..4fc47c02d 100644 --- a/src/bridge/IrcBridge.ts +++ b/src/bridge/IrcBridge.ts @@ -198,7 +198,9 @@ export class IrcBridge { }); this.matrixHandler = new MatrixHandler(this, this.config.ircService.matrixHandler || {}, this.membershipQueue); this.privacyProtection = new PrivacyProtection(this); - this.ircHandler = new IrcHandler(this, this.config.ircService.ircHandler, this.membershipQueue, this.privacyProtection); + this.ircHandler = new IrcHandler( + this, this.config.ircService.ircHandler, this.membershipQueue, this.privacyProtection + ); // By default the bridge will escape mxids, but the irc bridge isn't ready for this yet. MatrixUser.ESCAPE_DEFAULT = false; diff --git a/src/bridge/MatrixHandler.ts b/src/bridge/MatrixHandler.ts index 3f8ee362c..0779b0f23 100644 --- a/src/bridge/MatrixHandler.ts +++ b/src/bridge/MatrixHandler.ts @@ -11,6 +11,7 @@ import { toIrcLowerCase } from "../irc/formatting"; import { AdminRoomHandler } from "./AdminRoomHandler"; import { trackChannelAndCreateRoom } from "./RoomCreation"; import { renderTemplate } from "../util/Template"; +import { trimString } from "../util/TrimString"; async function reqHandler(req: BridgeRequest, promise: PromiseLike) { try { @@ -1247,7 +1248,6 @@ export class MatrixHandler { else { rplSource = eventContent.content.body; } - rplSource = rplSource.substring(0, REPLY_SOURCE_MAX_LENGTH); cachedEvent = {sender: rplName, body: rplSource, timestamp: eventContent.origin_server_ts}; this.cacheEvent(eventId, cachedEvent); } @@ -1268,10 +1268,10 @@ export class MatrixHandler { // Get the first non-blank line from the source. const lines = rplSource.split('\n').filter((line) => !/^\s*$/.test(line)) if (lines.length > 0) { - // Cut to a maximum length. - rplSource = lines[0].substring(0, REPLY_SOURCE_MAX_LENGTH); + rplSource = trimString(lines[0], REPLY_SOURCE_MAX_LENGTH); + // Ellipsis if needed. - if (lines[0].length > REPLY_SOURCE_MAX_LENGTH) { + if (lines.length > 1 || rplSource.length < lines[0].length) { rplSource = rplSource + "..."; } } diff --git a/src/util/TrimString.ts b/src/util/TrimString.ts new file mode 100644 index 000000000..58b87de27 --- /dev/null +++ b/src/util/TrimString.ts @@ -0,0 +1,46 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// since trimRight()/trimEnd() may break unicode characters +function trimTrailingWhitespace(input: string): string { + return input.replace(/\s*$/u, ''); +} + +// a unicode-aware substring(0, $x) that tries to not break words if possible +export function trimString(input: string, maxLength: number): string { + const re = new RegExp(`^([\\s\\S]{0,${maxLength}})(\\p{L}?)`, 'u'); + const match = input.match(re); + + if (!match) { + // fallback to a dumb substring() if the regex failed for any reason + return trimTrailingWhitespace(input.substring(0, maxLength)); + } + + const trimmed = trimTrailingWhitespace(match[1]); + + if (match[2]) { + // find as much as you can that is followed by a word boundary, + // shorter than what we have now, but at least 75% of the desired length + const smallerMatch = trimmed.match(/^([\s\S]*\S)\b[\s\S]/u); + const minLength = maxLength * 0.75; + + if (smallerMatch && smallerMatch[1].length >= minLength) { + return smallerMatch[1]; + } + } + + return trimmed; +}