Skip to content

Commit

Permalink
linkify: simplify noscheme detection logic
Browse files Browse the repository at this point in the history
Overriding the built in is poor form, as this prevents adding
a new type handler with its own normalize handler.

We only ever want to override protocol-less URLs to http, so
we just do so explicitly in the "//" schema normalizer.

This also means that we don't need all that type conversion dance,
we simply set the schema to null when we patch it and filter on the
schema directly
  • Loading branch information
brunnre8 committed Jan 21, 2024
1 parent ae6bae6 commit dd24cb1
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 29 deletions.
47 changes: 18 additions & 29 deletions shared/linkify.ts
Original file line number Diff line number Diff line change
@@ -1,36 +1,12 @@
import LinkifyIt, {Match} from "linkify-it";
import tlds from "tlds";

export type NoSchemaMatch = Match & {
noschema: boolean;
};

export type LinkPart = {
start: number;
end: number;
link: string;
};

LinkifyIt.prototype.normalize = function normalize(match: NoSchemaMatch) {
match.noschema = false;

if (!match.schema) {
match.schema = "http:";
match.url = "http://" + match.url;
match.noschema = true;
}

if (match.schema === "//") {
match.schema = "http:";
match.url = "http:" + match.url;
match.noschema = true;
}

if (match.schema === "mailto:" && !/^mailto:/i.test(match.url)) {
match.url = "mailto:" + match.url;
}
};

const linkify = LinkifyIt().tlds(tlds).tlds("onion", true);

// Known schemes to detect in text
Expand Down Expand Up @@ -73,12 +49,25 @@ linkify.add("web+", {
},
normalize(match) {
match.schema = match.text.slice(0, match.text.indexOf(":") + 1);
LinkifyIt.prototype.normalize(match); // hand over to the global override
},
});

// we must rewrite protocol less urls to http, else if TL is hosted
// on https, this would incorrectly use https for the remote link.
// See https://github.com/thelounge/thelounge/issues/2525
//
// We take the validation logic from linkify and just add our own
// normalizer.
linkify.add("//", {
validate: (linkify as any).__schemas__["//"].validate,
normalize(match) {
match.schema = ""; // this counts as not having a schema
match.url = "http:" + match.url;
},
});

export function findLinks(text: string) {
const matches = linkify.match(text) as NoSchemaMatch[];
const matches = linkify.match(text);

if (!matches) {
return [];
Expand All @@ -88,16 +77,16 @@ export function findLinks(text: string) {
}

export function findLinksWithSchema(text: string) {
const matches = linkify.match(text) as NoSchemaMatch[];
const matches = linkify.match(text);

if (!matches) {
return [];
}

return matches.filter((url) => !url.noschema).map(makeLinkPart);
return matches.filter((url) => !!url.schema).map(makeLinkPart);
}

function makeLinkPart(url: NoSchemaMatch): LinkPart {
function makeLinkPart(url: Match): LinkPart {
return {
start: url.index,
end: url.lastIndex,
Expand Down
35 changes: 35 additions & 0 deletions test/shared/findLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,26 @@ describe("findLinks", () => {
expect(actual).to.deep.equal(expected);
});

it("should parse mailto links", () => {
const input = "mail@example.com mailto:mail@example.org";
const expected = [
{
link: "mailto:mail@example.com",
start: 0,
end: 16,
},
{
link: "mailto:mail@example.org",
start: 17,
end: 40,
},
];

const actual = findLinks(input);

expect(actual).to.deep.equal(expected);
});

it("should not return urls with no schema if flag is specified", () => {
const input = "https://example.global //example.com http://example.group example.py";
const expected = [
Expand All @@ -373,6 +393,21 @@ describe("findLinks", () => {
expect(actual).to.deep.equal(expected);
});

it("should use http for protocol-less URLs", () => {
const input = "//example.com";
const expected = [
{
link: "http://example.com",
start: 0,
end: 13,
},
];

const actual = findLinks(input);

expect(actual).to.deep.equal(expected);
});

it("should find web+ schema urls", () => {
const input = "web+ap://instance.example/@Example web+whatever://example.com?some=value";
const expected = [
Expand Down

0 comments on commit dd24cb1

Please sign in to comment.