From bc7d956f77e36771074257ce08d2f22689a1353f Mon Sep 17 00:00:00 2001 From: Tobias Naumann Date: Mon, 18 Mar 2024 16:30:13 +0100 Subject: [PATCH 1/2] Add Unicode normalization and IDNA encoding to qute-pass userscript for the 'pass' mode (gopass support missing) --- misc/userscripts/qute-pass | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/misc/userscripts/qute-pass b/misc/userscripts/qute-pass index 70a497b63a5..405e340b54b 100755 --- a/misc/userscripts/qute-pass +++ b/misc/userscripts/qute-pass @@ -40,11 +40,13 @@ import argparse import enum import fnmatch import functools +import idna import os import re import shlex import subprocess import sys +import unicodedata from urllib.parse import urlparse import tldextract @@ -116,6 +118,23 @@ def qute_command(command): fifo.write(command + '\n') fifo.flush() +# Encode candidate string parts as Internationalized Domain Name, doing +# Unicode normalization before. This allows to properly match (non-ASCII) +# pass entries with the corresponding domain names. +def idna_encode(name): + # Do Unicode normalization first, we use form NFKC because: + # 1. Use the compatibility normalization because these sequences have "the same meaning in some contexts" + # 2. idna.encode() below requires the Unicode strings to be in normalization form C + # See https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms + unicode_normalized = unicodedata.normalize("NFKC", name) + # Empty strings can not be encoded, they appear for example as empty + # parts in split_path. If something like this happens, we just fall back + # to the unicode representation (which may already be ASCII then). + try: + idna_encoded = idna.encode(unicode_normalized) + except idna.IDNAError: + idna_encoded = unicode_normalized + return idna_encoded def find_pass_candidates(domain, unfiltered=False): candidates = [] @@ -140,9 +159,13 @@ def find_pass_candidates(domain, unfiltered=False): split_path = pass_path.split(os.path.sep) for secret in secrets: secret_base = os.path.splitext(secret)[0] - if not unfiltered and domain not in (split_path + [secret_base]): + idna_domain = idna_encode(domain) + idna_split_path = [idna_encode(part) for part in split_path] + idna_secret_base = idna_encode(secret_base) + if not unfiltered and idna_domain not in (idna_split_path + [idna_secret_base]): continue + # Append the unencoded Unicode path/name since this is how pass uses them candidates.append(os.path.join(pass_path, secret_base)) return candidates From 12ebc843d1474ef0574848a8b35671e85b3d2e6e Mon Sep 17 00:00:00 2001 From: Tobias Naumann Date: Fri, 24 May 2024 00:16:04 +0200 Subject: [PATCH 2/2] Move idna_encode function calls out of loops whenever possible to avoid repeated computations --- misc/userscripts/qute-pass | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/userscripts/qute-pass b/misc/userscripts/qute-pass index 405e340b54b..0b483c0e265 100755 --- a/misc/userscripts/qute-pass +++ b/misc/userscripts/qute-pass @@ -149,6 +149,7 @@ def find_pass_candidates(domain, unfiltered=False): if unfiltered or domain in password: candidates.append(password) else: + idna_domain = idna_encode(domain) for path, directories, file_names in os.walk(arguments.password_store, followlinks=True): secrets = fnmatch.filter(file_names, '*.gpg') if not secrets: @@ -157,10 +158,9 @@ def find_pass_candidates(domain, unfiltered=False): # Strip password store path prefix to get the relative pass path pass_path = path[len(arguments.password_store):] split_path = pass_path.split(os.path.sep) + idna_split_path = [idna_encode(part) for part in split_path] for secret in secrets: secret_base = os.path.splitext(secret)[0] - idna_domain = idna_encode(domain) - idna_split_path = [idna_encode(part) for part in split_path] idna_secret_base = idna_encode(secret_base) if not unfiltered and idna_domain not in (idna_split_path + [idna_secret_base]): continue