From bc7d956f77e36771074257ce08d2f22689a1353f Mon Sep 17 00:00:00 2001
From: Tobias Naumann <tobias.naumann@iml.fraunhofer.de>
Date: Mon, 18 Mar 2024 16:30:13 +0100
Subject: [PATCH 1/2] Add Unicode normalization and IDNA encoding to qute-pass
 userscript for the 'pass' mode (gopass support missing)

---
 misc/userscripts/qute-pass | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/misc/userscripts/qute-pass b/misc/userscripts/qute-pass
index 70a497b63a5..405e340b54b 100755
--- a/misc/userscripts/qute-pass
+++ b/misc/userscripts/qute-pass
@@ -40,11 +40,13 @@ import argparse
 import enum
 import fnmatch
 import functools
+import idna
 import os
 import re
 import shlex
 import subprocess
 import sys
+import unicodedata
 from urllib.parse import urlparse
 
 import tldextract
@@ -116,6 +118,23 @@ def qute_command(command):
         fifo.write(command + '\n')
         fifo.flush()
 
+# Encode candidate string parts as Internationalized Domain Name, doing
+# Unicode normalization before. This allows to properly match (non-ASCII)
+# pass entries with the corresponding domain names.
+def idna_encode(name):
+    # Do Unicode normalization first, we use form NFKC because:
+    # 1. Use the compatibility normalization because these sequences have "the same meaning in some contexts"
+    # 2. idna.encode() below requires the Unicode strings to be in normalization form C
+    # See https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms
+    unicode_normalized = unicodedata.normalize("NFKC", name)
+    # Empty strings can not be encoded, they appear for example as empty
+    # parts in split_path. If something like this happens, we just fall back
+    # to the unicode representation (which may already be ASCII then).
+    try:
+        idna_encoded = idna.encode(unicode_normalized)
+    except idna.IDNAError:
+        idna_encoded = unicode_normalized
+    return idna_encoded
 
 def find_pass_candidates(domain, unfiltered=False):
     candidates = []
@@ -140,9 +159,13 @@ def find_pass_candidates(domain, unfiltered=False):
             split_path = pass_path.split(os.path.sep)
             for secret in secrets:
                 secret_base = os.path.splitext(secret)[0]
-                if not unfiltered and domain not in (split_path + [secret_base]):
+                idna_domain = idna_encode(domain)
+                idna_split_path = [idna_encode(part) for part in split_path]
+                idna_secret_base = idna_encode(secret_base)
+                if not unfiltered and idna_domain not in (idna_split_path + [idna_secret_base]):
                     continue
 
+                # Append the unencoded Unicode path/name since this is how pass uses them
                 candidates.append(os.path.join(pass_path, secret_base))
     return candidates
 

From 12ebc843d1474ef0574848a8b35671e85b3d2e6e Mon Sep 17 00:00:00 2001
From: Tobias Naumann <tobias.naumann@iml.fraunhofer.de>
Date: Fri, 24 May 2024 00:16:04 +0200
Subject: [PATCH 2/2] Move idna_encode function calls out of loops whenever
 possible to avoid repeated computations

---
 misc/userscripts/qute-pass | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/userscripts/qute-pass b/misc/userscripts/qute-pass
index 405e340b54b..0b483c0e265 100755
--- a/misc/userscripts/qute-pass
+++ b/misc/userscripts/qute-pass
@@ -149,6 +149,7 @@ def find_pass_candidates(domain, unfiltered=False):
             if unfiltered or domain in password:
                 candidates.append(password)
     else:
+        idna_domain = idna_encode(domain)
         for path, directories, file_names in os.walk(arguments.password_store, followlinks=True):
             secrets = fnmatch.filter(file_names, '*.gpg')
             if not secrets:
@@ -157,10 +158,9 @@ def find_pass_candidates(domain, unfiltered=False):
             # Strip password store path prefix to get the relative pass path
             pass_path = path[len(arguments.password_store):]
             split_path = pass_path.split(os.path.sep)
+            idna_split_path = [idna_encode(part) for part in split_path]
             for secret in secrets:
                 secret_base = os.path.splitext(secret)[0]
-                idna_domain = idna_encode(domain)
-                idna_split_path = [idna_encode(part) for part in split_path]
                 idna_secret_base = idna_encode(secret_base)
                 if not unfiltered and idna_domain not in (idna_split_path + [idna_secret_base]):
                     continue