diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..7ae415809 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +linter/log diff --git a/.travis.yml b/.travis.yml index ad4188d18..c9a4a7dd9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,21 +1,38 @@ language: c -compiler: - - gcc -# Change this to your needs +compiler: gcc +addons: + apt: + packages: + - python3 + - autoconf + - automake + - autopoint + - libtool + - gettext + - libidn11 + - libidn11-dev + - libidn2-0 + - libidn2-0-dev + - libicu48 + - libicu-dev + - libunistring0 + - libunistring-dev + script: - - DIR=`pwd` - - git clone https://github.com/rockdaboot/libpsl - - cd libpsl - - echo "EXTRA_DIST =" >gtk-doc.make - - echo "CLEANFILES =" >>gtk-doc.make - - autoreconf --install --force --symlink - - OPTIONS="--with-psl-file=$DIR/public_suffix_list.dat --with-psl-testfile=$DIR/tests/test_psl.txt" + - DIR=`pwd` + - cd linter + - ./pslint_selftest.sh + - ./pslint.py ../public_suffix_list.dat + - cd $DIR + - git clone --depth=1 --branch newfmt https://github.com/rockdaboot/libpsl + - cd libpsl + - echo "EXTRA_DIST =" >gtk-doc.make + - echo "CLEANFILES =" >>gtk-doc.make + - autoreconf --install --force --symlink + - OPTIONS="--with-psl-file=$DIR/public_suffix_list.dat --with-psl-testfile=$DIR/tests/tests.txt" # Test PSL data with libicu (IDNA2008 UTS#46) - - ./configure -C --enable-runtime=libicu --enable-builtin=libicu $OPTIONS && make clean && make check -j4 + - ./configure -C --enable-runtime=libicu --enable-builtin=libicu $OPTIONS && make clean && make check -j4 # TEST PSL data with libidn2 (IDNA2008) - - ./configure -C --enable-runtime=libidn2 --enable-builtin=libidn2 $OPTIONS && make clean && make check -j4 +# - ./configure -C --enable-runtime=libidn2 --enable-builtin=libidn2 $OPTIONS && make clean && make check -j4 # TEST PSL data with libidn (IDNA2003) - - ./configure -C --enable-runtime=libidn --enable-builtin=libidn $OPTIONS && make clean && make check -j4 -before_install: - - sudo apt-get -qq update - - sudo apt-get -q install autoconf automake autopoint libtool gettext libidn11 libidn11-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev libunistring0 libunistring-dev +# - ./configure -C --enable-runtime=libidn --enable-builtin=libidn $OPTIONS && make clean && make check -j4 diff --git a/linter/README.md b/linter/README.md new file mode 100644 index 000000000..9001071fa --- /dev/null +++ b/linter/README.md @@ -0,0 +1,35 @@ +This directory contains a linter for the Public Suffix List. + +Before you commit any changes to the PSL, please use the +linter to check the syntax. + +Usage +===== + +(from the repo's main directory) + +$ linter/pslint.py public_suffix_list.dat + +$? is set to 0 on success, else it is set to 1. + + +Selftest +======== + +Every change on pslint.py should be followed by a self-test. + +``` +$ cd linter +$ ./pslint_selftest.sh +test_allowedchars: OK +test_dots: OK +test_duplicate: OK +test_exception: OK +test_punycode: OK +test_section1: OK +test_section2: OK +test_section3: OK +test_section4: OK +test_spaces: OK +test_wildcard: OK +``` diff --git a/linter/pslint.py b/linter/pslint.py new file mode 100755 index 000000000..17c00bf5d --- /dev/null +++ b/linter/pslint.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*-# +# +# PSL linter written in python +# +# Copyright 2016 Tim Rühsen (tim dot ruehsen at gmx dot de). All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import sys +import codecs + +nline = 0 +line = "" +orig_line = "" +warnings = 0 +errors = 0 +skip_order_check = False + +def warning(msg): + global warnings, orig_line, nline + print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else "")) + warnings += 1 + +def error(msg): + global errors, orig_line, nline + print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else "")) + errors += 1 +# skip_order_check = True + +def print_psl(list): + for domain in list: + print(".".join(str(label) for label in reversed(domain))) + +def psl_key(s): + if s[0] == '*': + return 0 + if s[0] == '!': + return 1 + return 2 + +def check_order(group): + """Check the correct order of a domain group""" + global skip_order_check + + try: + if skip_order_check or len(group) < 2: + skip_order_check = False + return + + # check if the TLD is the identical within the group + if any(group[0][0] != labels[0] for labels in group): + warning('Domain group TLD is not consistent') + + # sort by # of labels, label-by-label (labels are in reversed order) + sorted_group = sorted(group, key = lambda labels: (len(labels), psl_key(labels[-1][0]), labels)) + + if group != sorted_group: + warning('Incorrectly sorted group of domains') + print(" " + str(group)) + print(" " + str(sorted_group)) + print("Correct sorting would be:") + print_psl(sorted_group) + + finally: + del group[:] + + +def lint_psl(infile): + """Parses PSL file and performs syntax checking""" + global orig_line, nline + + PSL_FLAG_EXCEPTION = (1<<0) + PSL_FLAG_WILDCARD = (1<<1) + PSL_FLAG_ICANN = (1<<2) # entry of ICANN section + PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section + PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking + + line2number = {} + line2flag = {} + group = [] + section = 0 + icann_sections = 0 + private_sections = 0 + + lines = [line.strip('\n') for line in infile] + + for line in lines: + nline += 1 + + # check for leadind/trailing whitespace + stripped = line.strip() + if stripped != line: + line = line.replace('\t','\\t') + line = line.replace('\r','^M') + orig_line = line + warning('Leading/Trailing whitespace') + orig_line = line + line = stripped + + # empty line (end of sorted domain group) + if not line: + # check_order(group) + continue + + # check for section begin/end + if line[0:2] == "//": + # check_order(group) + + if section == 0: + if line == "// ===BEGIN ICANN DOMAINS===": + section = PSL_FLAG_ICANN + icann_sections += 1 + elif line == "// ===BEGIN PRIVATE DOMAINS===": + section = PSL_FLAG_PRIVATE + private_sections += 1 + elif line[3:11] == "===BEGIN": + error('Unexpected begin of unknown section') + elif line[3:9] == "===END": + error('End of section without previous begin') + elif section == PSL_FLAG_ICANN: + if line == "// ===END ICANN DOMAINS===": + section = 0 + elif line[3:11] == "===BEGIN": + error('Unexpected begin of section: ') + elif line[3:9] == "===END": + error('Unexpected end of section') + elif section == PSL_FLAG_PRIVATE: + if line == "// ===END PRIVATE DOMAINS===": + section = 0 + elif line[3:11] == "===BEGIN": + error('Unexpected begin of section') + elif line[3:9] == "===END": + error('Unexpected end of section') + + continue # processing of comments ends here + + # No rule must be outside of a section + if section == 0: + error('Rule outside of section') + + group.append(list(reversed(line.split('.')))) + + # decode UTF-8 input into unicode, needed only for python 2.x + try: + if sys.version_info[0] < 3: + line = line.decode('utf-8') + else: + line.encode('utf-8') + except (UnicodeDecodeError, UnicodeEncodeError): + orig_line = None + error('Invalid UTF-8 character') + continue + + # each rule must be lowercase (or more exactly: not uppercase and not titlecase) + if line != line.lower(): + error('Rule must be lowercase') + + # strip leading wildcards + flags = section + # while line[0:2] == '*.': + if line[0:2] == '*.': + flags |= PSL_FLAG_WILDCARD + line = line[2:] + + if line[0] == '!': + flags |= PSL_FLAG_EXCEPTION + line = line[1:] + else: + flags |= PSL_FLAG_PLAIN + + # wildcard and exception must not combine + if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION: + error('Combination of wildcard and exception') + continue + + labels = line.split('.') + + if flags & PSL_FLAG_EXCEPTION and len(labels) > 1: + domain = ".".join(str(label) for label in labels[1:]) + if not domain in line2flag: + error('Exception without previous wildcard') + elif not line2flag[domain] & PSL_FLAG_WILDCARD: + error('Exception without previous wildcard') + + for label in labels: + if not label: + error('Leading/trailing or multiple dot') + continue + + if label[0:4] == 'xn--': + error('Punycode found') + continue + + if '--' in label: + error('Double minus found') + continue + + # allowed are a-z,0-9,- and unicode >= 128 (maybe that can be finetuned a bit !?) + for c in label: + if not c.isalnum() and c != '-' and ord(c) < 128: + error('Illegal character') + break + + if line in line2flag: + '''Found existing entry: + Combination of exception and plain rule is contradictionary + !foo.bar + foo.bar + Doublette, since *.foo.bar implies foo.bar: + foo.bar + *.foo.bar + Allowed: + !foo.bar + *.foo.bar + ''' + error('Found doublette/ambiguity (previous line was %d)' % line2number[line]) + + line2number[line] = nline + line2flag[line] = flags + + orig_line = None + + if section == PSL_FLAG_ICANN: + error('ICANN section not closed') + elif section == PSL_FLAG_PRIVATE: + error('PRIVATE section not closed') + + if icann_sections < 1: + warning('No ICANN section found') + elif icann_sections > 1: + warning('%d ICANN sections found' % icann_sections) + + if private_sections < 1: + warning('No PRIVATE section found') + elif private_sections > 1: + warning('%d PRIVATE sections found' % private_sections) + +def usage(): + """Prints the usage""" + print('usage: %s PSLfile' % sys.argv[0]) + print('or %s - # To read PSL from STDIN' % sys.argv[0]) + exit(1) + + +def main(): + """Check syntax of a PSL file""" + if len(sys.argv) < 2: + usage() + + with sys.stdin if sys.argv[-1] == '-' else open(sys.argv[-1], 'r', encoding='utf-8', errors="surrogateescape") as infile: + lint_psl(infile) + + return errors != 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/linter/pslint_selftest.sh b/linter/pslint_selftest.sh new file mode 100755 index 000000000..8ce57a63c --- /dev/null +++ b/linter/pslint_selftest.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +rc=0 +rm -rf log +mkdir -p log + +# add CR if missing, it won't possibly survive git +sed -i -e 's/^e.example.com$/e.example.com\r/g' test_spaces.input + +for file in `ls *.input|cut -d'.' -f1`; do + echo -n "${file}: " + ./pslint.py ${file}.input >log/${file}.log 2>&1 + diff -u ${file}.expected log/${file}.log >log/${file}.diff + if [ $? -eq 0 ]; then + echo OK + rm log/${file}.diff log/${file}.log + else + echo FAILED + cat log/${file}.diff + rc=1 + fi +done + +# remove CR, to not appear as changed to git +sed -i -e 's/^e.example.com\r$/e.example.com/g' test_spaces.input + +if [ $rc -eq 0 ]; then + rmdir log +fi + +exit $rc diff --git a/linter/test_allowedchars.expected b/linter/test_allowedchars.expected new file mode 100644 index 000000000..d39fba742 --- /dev/null +++ b/linter/test_allowedchars.expected @@ -0,0 +1,4 @@ +10: error: Illegal character: 'a.exam#ple.com' +11: error: Illegal character: 'b.exam ple.com' +13: error: Invalid UTF-8 character +15: warning: No PRIVATE section found diff --git a/linter/test_allowedchars.input b/linter/test_allowedchars.input new file mode 100644 index 000000000..f6d87783c --- /dev/null +++ b/linter/test_allowedchars.input @@ -0,0 +1,15 @@ +// test: +// - label contains illegal character +// - c. is valid UTF-8 +// - d. has invalid UTF-8 code for the TLD +// +// best viewed with 'LC_ALL=C vi ' + +// ===BEGIN ICANN DOMAINS=== + +a.exam#ple.com +b.exam ple.com +c.测试 +d.测 + +// ===END ICANN DOMAINS=== diff --git a/linter/test_dots.expected b/linter/test_dots.expected new file mode 100644 index 000000000..19878402a --- /dev/null +++ b/linter/test_dots.expected @@ -0,0 +1,4 @@ +9: error: Leading/trailing or multiple dot: '.a.example.com' +10: error: Leading/trailing or multiple dot: 'b.example.com.' +11: error: Leading/trailing or multiple dot: 'c..example.com' +13: warning: No PRIVATE section found diff --git a/linter/test_dots.input b/linter/test_dots.input new file mode 100644 index 000000000..3290441d7 --- /dev/null +++ b/linter/test_dots.input @@ -0,0 +1,13 @@ +// test: +// - leading dot +// - trailing dot +// - consecutive dots + +// ===BEGIN ICANN DOMAINS=== + +// example.com: https://www.iana.org/domains/reserved +.a.example.com +b.example.com. +c..example.com + +// ===END ICANN DOMAINS=== diff --git a/linter/test_duplicate.expected b/linter/test_duplicate.expected new file mode 100644 index 000000000..40a5e347c --- /dev/null +++ b/linter/test_duplicate.expected @@ -0,0 +1,6 @@ +9: error: Found doublette/ambiguity (previous line was 8): '*.com' +13: error: Found doublette/ambiguity (previous line was 12): '!www.com' +17: error: Found doublette/ambiguity (previous line was 16): '*.example.com' +21: error: Found doublette/ambiguity (previous line was 20): 'example1.com' +24: error: Found doublette/ambiguity (previous line was 17): 'example.com' +26: warning: No PRIVATE section found diff --git a/linter/test_duplicate.input b/linter/test_duplicate.input new file mode 100644 index 000000000..48cd9ffc2 --- /dev/null +++ b/linter/test_duplicate.input @@ -0,0 +1,26 @@ +// test: +// - valid wildcard usage +// - invalid wildcard usage + +// ===BEGIN ICANN DOMAINS=== + +// *.com implicitely includes .com +com +*.com + +// double exception +!www.com +!www.com + +// double wildcard +*.example.com +*.example.com + +// double plain rule +example1.com +example1.com + +// redundant/overlapping rule +example.com + +// ===END ICANN DOMAINS=== diff --git a/linter/test_exception.expected b/linter/test_exception.expected new file mode 100644 index 000000000..f656d6ff4 --- /dev/null +++ b/linter/test_exception.expected @@ -0,0 +1,6 @@ +17: error: Leading/trailing or multiple dot: '!.example.com' +18: error: Illegal character: 'w!w.example.com' +19: error: Found doublette/ambiguity (previous line was 12): '!www.example.com' +20: error: Exception without previous wildcard: '!a.b.example.com' +21: error: Exception without previous wildcard: '!a.c.example.com' +23: warning: No PRIVATE section found diff --git a/linter/test_exception.input b/linter/test_exception.input new file mode 100644 index 000000000..96b1d7bcb --- /dev/null +++ b/linter/test_exception.input @@ -0,0 +1,23 @@ +// test: +// - valid exception +// - invalid exceptions +// - same exception twice +// - exception without wildcard +// - exception with prevailing '*' rule (!localhost) + +// ===BEGIN ICANN DOMAINS=== + +// valid +*.example.com +!www.example.com +!localhost +c.example.com + +// invalid +!.example.com +w!w.example.com +!www.example.com +!a.b.example.com +!a.c.example.com + +// ===END ICANN DOMAINS=== diff --git a/linter/test_punycode.expected b/linter/test_punycode.expected new file mode 100644 index 000000000..1c4ab0e89 --- /dev/null +++ b/linter/test_punycode.expected @@ -0,0 +1,3 @@ +7: error: Punycode found: 'a.xn--0zwm56d' +8: error: Double minus found: 'a.ex--ample.com' +10: warning: No PRIVATE section found diff --git a/linter/test_punycode.input b/linter/test_punycode.input new file mode 100644 index 000000000..be4921021 --- /dev/null +++ b/linter/test_punycode.input @@ -0,0 +1,10 @@ +// test: +// - label is punycode +// - label has double minus + +// ===BEGIN ICANN DOMAINS=== + +a.xn--0zwm56d +a.ex--ample.com + +// ===END ICANN DOMAINS=== diff --git a/linter/test_section1.expected b/linter/test_section1.expected new file mode 100644 index 000000000..d5eedfba3 --- /dev/null +++ b/linter/test_section1.expected @@ -0,0 +1,3 @@ +4: error: Rule outside of section: 'example.com' +4: warning: No ICANN section found +4: warning: No PRIVATE section found diff --git a/linter/test_section1.input b/linter/test_section1.input new file mode 100644 index 000000000..25f6b66b4 --- /dev/null +++ b/linter/test_section1.input @@ -0,0 +1,4 @@ +// test: +// - no section at all + +example.com diff --git a/linter/test_section2.expected b/linter/test_section2.expected new file mode 100644 index 000000000..a5eafb955 --- /dev/null +++ b/linter/test_section2.expected @@ -0,0 +1,2 @@ +11: warning: 2 ICANN sections found +11: warning: No PRIVATE section found diff --git a/linter/test_section2.input b/linter/test_section2.input new file mode 100644 index 000000000..90f40a248 --- /dev/null +++ b/linter/test_section2.input @@ -0,0 +1,11 @@ +// test: +// - two ICANN sections + +// ===BEGIN ICANN DOMAINS=== + +example.com + +// ===END ICANN DOMAINS=== + +// ===BEGIN ICANN DOMAINS=== +// ===END ICANN DOMAINS=== diff --git a/linter/test_section3.expected b/linter/test_section3.expected new file mode 100644 index 000000000..fd281214b --- /dev/null +++ b/linter/test_section3.expected @@ -0,0 +1,2 @@ +11: warning: No ICANN section found +11: warning: 2 PRIVATE sections found diff --git a/linter/test_section3.input b/linter/test_section3.input new file mode 100644 index 000000000..f3af1854f --- /dev/null +++ b/linter/test_section3.input @@ -0,0 +1,11 @@ +// test: +// - two PRIVATE sections + +// ===BEGIN PRIVATE DOMAINS=== + +example.com + +// ===END PRIVATE DOMAINS=== + +// ===BEGIN PRIVATE DOMAINS=== +// ===END PRIVATE DOMAINS=== diff --git a/linter/test_section4.expected b/linter/test_section4.expected new file mode 100644 index 000000000..e8a0906ac --- /dev/null +++ b/linter/test_section4.expected @@ -0,0 +1,3 @@ +8: error: Unexpected end of section: '// ===END PRIVATE DOMAINS===' +8: error: ICANN section not closed +8: warning: No PRIVATE section found diff --git a/linter/test_section4.input b/linter/test_section4.input new file mode 100644 index 000000000..98ca33806 --- /dev/null +++ b/linter/test_section4.input @@ -0,0 +1,8 @@ +// test: +// - ICANN section improperly closed + +// ===BEGIN ICANN DOMAINS=== + +example.com + +// ===END PRIVATE DOMAINS=== diff --git a/linter/test_spaces.expected b/linter/test_spaces.expected new file mode 100644 index 000000000..0adc538b6 --- /dev/null +++ b/linter/test_spaces.expected @@ -0,0 +1,6 @@ +12: warning: Leading/Trailing whitespace: ' a.example.com' +13: warning: Leading/Trailing whitespace: 'b.example.com ' +14: warning: Leading/Trailing whitespace: '\tc.example.com' +15: warning: Leading/Trailing whitespace: 'd.example.com\t' +17: warning: Leading/Trailing whitespace: ' ' +19: warning: No PRIVATE section found diff --git a/linter/test_spaces.input b/linter/test_spaces.input new file mode 100644 index 000000000..e35dbd3e8 --- /dev/null +++ b/linter/test_spaces.input @@ -0,0 +1,19 @@ +// test: +// - leading space +// - trailing space, empty line with spaces +// - leading tab +// - trailing tab +// - line ends with CRLF (pslint_selftest will add one to e.example.com and removed it after testing) +// - empty line with spaces + +// ===BEGIN ICANN DOMAINS=== + +// example.com: https://www.iana.org/domains/reserved + a.example.com +b.example.com + c.example.com +d.example.com +e.example.com + + +// ===END ICANN DOMAINS=== diff --git a/linter/test_wildcard.expected b/linter/test_wildcard.expected new file mode 100644 index 000000000..54570944e --- /dev/null +++ b/linter/test_wildcard.expected @@ -0,0 +1,5 @@ +11: error: Illegal character: '**.com' +12: error: Illegal character: 'a*.com' +13: error: Illegal character: 'b.*.com' +14: error: Illegal character: 'a.b.*' +16: warning: No PRIVATE section found diff --git a/linter/test_wildcard.input b/linter/test_wildcard.input new file mode 100644 index 000000000..1bfb05ae2 --- /dev/null +++ b/linter/test_wildcard.input @@ -0,0 +1,16 @@ +// test: +// - valid wildcard usage +// - invalid wildcard usage + +// ===BEGIN ICANN DOMAINS=== + +// valid +*.com + +// invalid +**.com +a*.com +b.*.com +a.b.* + +// ===END ICANN DOMAINS=== diff --git a/tests/tests.txt b/tests/tests.txt new file mode 100644 index 000000000..98377cfea --- /dev/null +++ b/tests/tests.txt @@ -0,0 +1,98 @@ +// Any copyright is dedicated to the Public Domain. +// https://creativecommons.org/publicdomain/zero/1.0/ + +// null input. +null null +// Mixed case. +COM null +example.COM example.com +WwW.example.COM example.com +// Leading dot. +.com null +.example null +.example.com null +.example.example null +// Unlisted TLD. +example null +example.example example.example +b.example.example example.example +a.b.example.example example.example +// Listed, but non-Internet, TLD. +//local null +//example.local null +//b.example.local null +//a.b.example.local null +// TLD with only 1 rule. +biz null +domain.biz domain.biz +b.domain.biz domain.biz +a.b.domain.biz domain.biz +// TLD with some 2-level rules. +com null +example.com example.com +b.example.com example.com +a.b.example.com example.com +uk.com null +example.uk.com example.uk.com +b.example.uk.com example.uk.com +a.b.example.uk.com example.uk.com +test.ac test.ac +// TLD with only 1 (wildcard) rule. +mm null +c.mm null +b.c.mm b.c.mm +a.b.c.mm b.c.mm +// More complex TLD. +jp null +test.jp test.jp +www.test.jp test.jp +ac.jp null +test.ac.jp test.ac.jp +www.test.ac.jp test.ac.jp +kyoto.jp null +test.kyoto.jp test.kyoto.jp +ide.kyoto.jp null +b.ide.kyoto.jp b.ide.kyoto.jp +a.b.ide.kyoto.jp b.ide.kyoto.jp +c.kobe.jp null +b.c.kobe.jp b.c.kobe.jp +a.b.c.kobe.jp b.c.kobe.jp +city.kobe.jp city.kobe.jp +www.city.kobe.jp city.kobe.jp +// TLD with a wildcard rule and exceptions. +ck null +test.ck null +b.test.ck b.test.ck +a.b.test.ck b.test.ck +www.ck www.ck +www.www.ck www.ck +// US K12. +us null +test.us test.us +www.test.us test.us +ak.us null +test.ak.us test.ak.us +www.test.ak.us test.ak.us +k12.ak.us null +test.k12.ak.us test.k12.ak.us +www.test.k12.ak.us test.k12.ak.us +// IDN labels. +食狮.com.cn 食狮.com.cn +食狮.公司.cn 食狮.公司.cn +www.食狮.公司.cn 食狮.公司.cn +shishi.公司.cn shishi.公司.cn +公司.cn null +食狮.中国 食狮.中国 +www.食狮.中国 食狮.中国 +shishi.中国 shishi.中国 +中国 null +// Same as above, but punycoded. +xn--85x722f.com.cn xn--85x722f.com.cn +xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn +www.xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn +shishi.xn--55qx5d.cn shishi.xn--55qx5d.cn +xn--55qx5d.cn null +xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s +www.xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s +shishi.xn--fiqs8s shishi.xn--fiqs8s +xn--fiqs8s null diff --git a/tools/convert_tests b/tools/convert_tests new file mode 100755 index 000000000..a9f5d5145 --- /dev/null +++ b/tools/convert_tests @@ -0,0 +1,7 @@ +#!/bin/sh +# +# Written 2016 by Tim Ruehsen (tim dot ruehsen at gmx dot de) +# +# Convert test_psl.txt to tests.txt (sed hack) + +sed -e "s/checkPublicSuffix('*\([^']*\)'*, '*\([^']*\)'*);/\1 \2/g" test_psl.txt >tests.txt