From 591660ddd792c83e641f374255650a7f6a765e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Wed, 21 Jul 2021 13:33:53 +0200 Subject: [PATCH 1/4] Ensure ellipsis is added when replying to cached events --- src/bridge/MatrixHandler.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bridge/MatrixHandler.ts b/src/bridge/MatrixHandler.ts index 3f8ee362c..21c25f60d 100644 --- a/src/bridge/MatrixHandler.ts +++ b/src/bridge/MatrixHandler.ts @@ -1247,7 +1247,6 @@ export class MatrixHandler { else { rplSource = eventContent.content.body; } - rplSource = rplSource.substring(0, REPLY_SOURCE_MAX_LENGTH); cachedEvent = {sender: rplName, body: rplSource, timestamp: eventContent.origin_server_ts}; this.cacheEvent(eventId, cachedEvent); } From 9cb96f3c75ac7937b85e3822545444e471fc47cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Wed, 21 Jul 2021 15:14:19 +0200 Subject: [PATCH 2/4] Make sure we're not breaking unicode characters when trimming reply strings --- spec/unit/TrimString.spec.js | 20 ++++++++++++++++++++ src/bridge/IrcBridge.ts | 4 +++- src/bridge/MatrixHandler.ts | 7 ++++--- src/util/TrimString.ts | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 spec/unit/TrimString.spec.js create mode 100644 src/util/TrimString.ts diff --git a/spec/unit/TrimString.spec.js b/spec/unit/TrimString.spec.js new file mode 100644 index 000000000..72822c20a --- /dev/null +++ b/spec/unit/TrimString.spec.js @@ -0,0 +1,20 @@ +"use strict"; +const { trimString } = require("../../lib/util/TrimString.js"); + +describe("trimString", function() { + it("should not cut unicode characters in half", (done) => { + const input = "lol 😅"; + const result = trimString(input, 5); + expect(result).toEqual(input); + + done(); + }); + + it("should trim trailing whitespace by itself", (done) => { + const input = "lol 😅"; + const result = trimString(input, 4); + expect(result).toEqual('lol'); + + done(); + }); +}); diff --git a/src/bridge/IrcBridge.ts b/src/bridge/IrcBridge.ts index 1bea410ab..4fc47c02d 100644 --- a/src/bridge/IrcBridge.ts +++ b/src/bridge/IrcBridge.ts @@ -198,7 +198,9 @@ export class IrcBridge { }); this.matrixHandler = new MatrixHandler(this, this.config.ircService.matrixHandler || {}, this.membershipQueue); this.privacyProtection = new PrivacyProtection(this); - this.ircHandler = new IrcHandler(this, this.config.ircService.ircHandler, this.membershipQueue, this.privacyProtection); + this.ircHandler = new IrcHandler( + this, this.config.ircService.ircHandler, this.membershipQueue, this.privacyProtection + ); // By default the bridge will escape mxids, but the irc bridge isn't ready for this yet. MatrixUser.ESCAPE_DEFAULT = false; diff --git a/src/bridge/MatrixHandler.ts b/src/bridge/MatrixHandler.ts index 21c25f60d..0779b0f23 100644 --- a/src/bridge/MatrixHandler.ts +++ b/src/bridge/MatrixHandler.ts @@ -11,6 +11,7 @@ import { toIrcLowerCase } from "../irc/formatting"; import { AdminRoomHandler } from "./AdminRoomHandler"; import { trackChannelAndCreateRoom } from "./RoomCreation"; import { renderTemplate } from "../util/Template"; +import { trimString } from "../util/TrimString"; async function reqHandler(req: BridgeRequest, promise: PromiseLike) { try { @@ -1267,10 +1268,10 @@ export class MatrixHandler { // Get the first non-blank line from the source. const lines = rplSource.split('\n').filter((line) => !/^\s*$/.test(line)) if (lines.length > 0) { - // Cut to a maximum length. - rplSource = lines[0].substring(0, REPLY_SOURCE_MAX_LENGTH); + rplSource = trimString(lines[0], REPLY_SOURCE_MAX_LENGTH); + // Ellipsis if needed. - if (lines[0].length > REPLY_SOURCE_MAX_LENGTH) { + if (lines.length > 1 || rplSource.length < lines[0].length) { rplSource = rplSource + "..."; } } diff --git a/src/util/TrimString.ts b/src/util/TrimString.ts new file mode 100644 index 000000000..606b22aaa --- /dev/null +++ b/src/util/TrimString.ts @@ -0,0 +1,33 @@ +/* +Copyright 2021 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// a unicode-aware substring(0, $x) +export function trimString(input: string, maxLength: number): string { + const re = new RegExp(`^([\\s\\S]{0,${maxLength}})`, 'u'); + const match = input.match(re); + + let trimmed: string; + if (match) { + trimmed = match[1]; + } + else { + // fallback to a dumb substring() if the regex failed for any reason + trimmed = input.substring(0, maxLength); + } + // + // trimRight()/trimEnd() may break unicode characters + return trimmed.replace(/\s*$/u, ''); +} From 123f5b717c9223520da75d022d30476a3348f57f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Wed, 21 Jul 2021 16:19:45 +0200 Subject: [PATCH 3/4] Try not to break words when quoting original message during replies --- changelog.d/1428.feature | 1 + spec/unit/TrimString.spec.js | 16 ++++++++++++++++ src/util/TrimString.ts | 35 ++++++++++++++++++++++++----------- 3 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 changelog.d/1428.feature diff --git a/changelog.d/1428.feature b/changelog.d/1428.feature new file mode 100644 index 000000000..fca637986 --- /dev/null +++ b/changelog.d/1428.feature @@ -0,0 +1 @@ +Truncate original messages more gently when replying diff --git a/spec/unit/TrimString.spec.js b/spec/unit/TrimString.spec.js index 72822c20a..ad35a81f0 100644 --- a/spec/unit/TrimString.spec.js +++ b/spec/unit/TrimString.spec.js @@ -17,4 +17,20 @@ describe("trimString", function() { done(); }); + + it('should stop trimming at the word boundary if reasonable', (done) => { + const input = "this sentence is waaaaay too long"; + const result = trimString(input, 20); + expect(result).toEqual('this sentence is'); + + done(); + }); + + it('should give up looking for a word boundary if result would become too short', (done) => { + const input = "we're in Llanfairpwllgwyngyll"; + const result = trimString(input, 24); + expect(result).toContain("we're in Llan"); + + done(); + }); }); diff --git a/src/util/TrimString.ts b/src/util/TrimString.ts index 606b22aaa..58b87de27 100644 --- a/src/util/TrimString.ts +++ b/src/util/TrimString.ts @@ -14,20 +14,33 @@ See the License for the specific language governing permissions and limitations under the License. */ -// a unicode-aware substring(0, $x) +// since trimRight()/trimEnd() may break unicode characters +function trimTrailingWhitespace(input: string): string { + return input.replace(/\s*$/u, ''); +} + +// a unicode-aware substring(0, $x) that tries to not break words if possible export function trimString(input: string, maxLength: number): string { - const re = new RegExp(`^([\\s\\S]{0,${maxLength}})`, 'u'); + const re = new RegExp(`^([\\s\\S]{0,${maxLength}})(\\p{L}?)`, 'u'); const match = input.match(re); - let trimmed: string; - if (match) { - trimmed = match[1]; - } - else { + if (!match) { // fallback to a dumb substring() if the regex failed for any reason - trimmed = input.substring(0, maxLength); + return trimTrailingWhitespace(input.substring(0, maxLength)); + } + + const trimmed = trimTrailingWhitespace(match[1]); + + if (match[2]) { + // find as much as you can that is followed by a word boundary, + // shorter than what we have now, but at least 75% of the desired length + const smallerMatch = trimmed.match(/^([\s\S]*\S)\b[\s\S]/u); + const minLength = maxLength * 0.75; + + if (smallerMatch && smallerMatch[1].length >= minLength) { + return smallerMatch[1]; + } } - // - // trimRight()/trimEnd() may break unicode characters - return trimmed.replace(/\s*$/u, ''); + + return trimmed; } From 9cbb200e4de0ab2c1b2d244062c48dd425595905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Wed, 21 Jul 2021 16:56:22 +0200 Subject: [PATCH 4/4] Drop unnecessary async code from synchronous tests --- spec/unit/Template.spec.js | 4 +--- spec/unit/TrimString.spec.js | 16 ++++------------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/spec/unit/Template.spec.js b/spec/unit/Template.spec.js index 1f9bbc48d..e4d1df7b4 100644 --- a/spec/unit/Template.spec.js +++ b/spec/unit/Template.spec.js @@ -2,14 +2,12 @@ const { renderTemplate } = require("../../lib/util/Template.js"); describe("renderTemplate", function() { - it("should replace placeholders with submitted values", (done) => { + it("should replace placeholders with submitted values", () => { const template = '$FOO bar $BAZ'; const result = renderTemplate(template, { foo: 'one', baz: 'two', }); expect(result).toEqual('one bar two'); - - done(); }); }); diff --git a/spec/unit/TrimString.spec.js b/spec/unit/TrimString.spec.js index ad35a81f0..ca94f860f 100644 --- a/spec/unit/TrimString.spec.js +++ b/spec/unit/TrimString.spec.js @@ -2,35 +2,27 @@ const { trimString } = require("../../lib/util/TrimString.js"); describe("trimString", function() { - it("should not cut unicode characters in half", (done) => { + it("should not cut unicode characters in half", () => { const input = "lol 😅"; const result = trimString(input, 5); expect(result).toEqual(input); - - done(); }); - it("should trim trailing whitespace by itself", (done) => { + it("should trim trailing whitespace by itself", () => { const input = "lol 😅"; const result = trimString(input, 4); expect(result).toEqual('lol'); - - done(); }); - it('should stop trimming at the word boundary if reasonable', (done) => { + it('should stop trimming at the word boundary if reasonable', () => { const input = "this sentence is waaaaay too long"; const result = trimString(input, 20); expect(result).toEqual('this sentence is'); - - done(); }); - it('should give up looking for a word boundary if result would become too short', (done) => { + it('should give up looking for a word boundary if result would become too short', () => { const input = "we're in Llanfairpwllgwyngyll"; const result = trimString(input, 24); expect(result).toContain("we're in Llan"); - - done(); }); });