Skip to content

Commit

Permalink
more robust URI generation
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed May 12, 2023
1 parent 4740b0d commit c859dc8
Showing 1 changed file with 7 additions and 13 deletions.
20 changes: 7 additions & 13 deletions codemeta/common.py
Expand Up @@ -4,6 +4,7 @@
import requests
import random
import re
import unicodedata

from collections import Counter, defaultdict
from tempfile import gettempdir
Expand Down Expand Up @@ -185,17 +186,7 @@
('^', '-ge-'), #used in npm version
('>', '-gt-'),
('<', '-lt-'),
('=', '-eq-'),
(' ', '-'),
('&', '-',),
('/', '-',),
('+', '-',),
(':', '-',),
(';', '-'),
(',',''),
('----', '-'),
('---', '-'),
('--', '-'),
('=', '-eq-')
]

#keywords that may be indicative of a certain interface type
Expand Down Expand Up @@ -1005,14 +996,17 @@ def urijoin(*args) -> str:
return s

def generate_uri(identifier: Union[str,None] = None, baseuri: Union[str,None] = None, prefix: str= ""):
"""Generate an URI (aka IRI)"""
"""Generate an URI"""
if not identifier:
identifier = "N" + "%032x" % random.getrandbits(128)
else:
identifier = identifier.lower()
#some symbols we handle specially:
for pattern, replacement in IDENTIFIER_MAP:
identifier = identifier.replace(pattern,replacement) #not the most efficient but it'll do
identifier = identifier.strip("-")
identifier = unicodedata.normalize("NFKD", identifier)
identifier = re.sub(r"[^a-z0-9]+", "_", identifier).strip("-")
identifier = re.sub(r"[_]+", "_", identifier)
if prefix and prefix[-1] not in ('/','#'):
prefix += '/'
if not baseuri:
Expand Down

0 comments on commit c859dc8

Please sign in to comment.