Skip to content

Commit

Permalink
Merge pull request #130 from rockdaboot/master
Browse files Browse the repository at this point in the history
Add pslint.py linter and use it on Travis CI
  • Loading branch information
weppos committed Mar 2, 2016
2 parents fa6e659 + e6065d2 commit e2f2f4b
Show file tree
Hide file tree
Showing 29 changed files with 676 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
linter/log
49 changes: 33 additions & 16 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
language: c
compiler:
- gcc
# Change this to your needs
compiler: gcc
addons:
apt:
packages:
- python3
- autoconf
- automake
- autopoint
- libtool
- gettext
- libidn11
- libidn11-dev
- libidn2-0
- libidn2-0-dev
- libicu48
- libicu-dev
- libunistring0
- libunistring-dev

script:
- DIR=`pwd`
- git clone https://github.com/rockdaboot/libpsl
- cd libpsl
- echo "EXTRA_DIST =" >gtk-doc.make
- echo "CLEANFILES =" >>gtk-doc.make
- autoreconf --install --force --symlink
- OPTIONS="--with-psl-file=$DIR/public_suffix_list.dat --with-psl-testfile=$DIR/tests/test_psl.txt"
- DIR=`pwd`
- cd linter
- ./pslint_selftest.sh
- ./pslint.py ../public_suffix_list.dat
- cd $DIR
- git clone --depth=1 --branch newfmt https://github.com/rockdaboot/libpsl
- cd libpsl
- echo "EXTRA_DIST =" >gtk-doc.make
- echo "CLEANFILES =" >>gtk-doc.make
- autoreconf --install --force --symlink
- OPTIONS="--with-psl-file=$DIR/public_suffix_list.dat --with-psl-testfile=$DIR/tests/tests.txt"
# Test PSL data with libicu (IDNA2008 UTS#46)
- ./configure -C --enable-runtime=libicu --enable-builtin=libicu $OPTIONS && make clean && make check -j4
- ./configure -C --enable-runtime=libicu --enable-builtin=libicu $OPTIONS && make clean && make check -j4
# TEST PSL data with libidn2 (IDNA2008)
- ./configure -C --enable-runtime=libidn2 --enable-builtin=libidn2 $OPTIONS && make clean && make check -j4
# - ./configure -C --enable-runtime=libidn2 --enable-builtin=libidn2 $OPTIONS && make clean && make check -j4
# TEST PSL data with libidn (IDNA2003)
- ./configure -C --enable-runtime=libidn --enable-builtin=libidn $OPTIONS && make clean && make check -j4
before_install:
- sudo apt-get -qq update
- sudo apt-get -q install autoconf automake autopoint libtool gettext libidn11 libidn11-dev libidn2-0 libidn2-0-dev libicu48 libicu-dev libunistring0 libunistring-dev
# - ./configure -C --enable-runtime=libidn --enable-builtin=libidn $OPTIONS && make clean && make check -j4
35 changes: 35 additions & 0 deletions linter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
This directory contains a linter for the Public Suffix List.

Before you commit any changes to the PSL, please use the
linter to check the syntax.

Usage
=====

(from the repo's main directory)

$ linter/pslint.py public_suffix_list.dat

$? is set to 0 on success, else it is set to 1.


Selftest
========

Every change on pslint.py should be followed by a self-test.

```
$ cd linter
$ ./pslint_selftest.sh
test_allowedchars: OK
test_dots: OK
test_duplicate: OK
test_exception: OK
test_punycode: OK
test_section1: OK
test_section2: OK
test_section3: OK
test_section4: OK
test_spaces: OK
test_wildcard: OK
```
271 changes: 271 additions & 0 deletions linter/pslint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-#
#
# PSL linter written in python
#
# Copyright 2016 Tim Rühsen (tim dot ruehsen at gmx dot de). All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

import sys
import codecs

nline = 0
line = ""
orig_line = ""
warnings = 0
errors = 0
skip_order_check = False

def warning(msg):
global warnings, orig_line, nline
print('%d: warning: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
warnings += 1

def error(msg):
global errors, orig_line, nline
print('%d: error: %s%s' % (nline, msg, ": \'" + orig_line + "\'" if orig_line else ""))
errors += 1
# skip_order_check = True

def print_psl(list):
for domain in list:
print(".".join(str(label) for label in reversed(domain)))

def psl_key(s):
if s[0] == '*':
return 0
if s[0] == '!':
return 1
return 2

def check_order(group):
"""Check the correct order of a domain group"""
global skip_order_check

try:
if skip_order_check or len(group) < 2:
skip_order_check = False
return

# check if the TLD is the identical within the group
if any(group[0][0] != labels[0] for labels in group):
warning('Domain group TLD is not consistent')

# sort by # of labels, label-by-label (labels are in reversed order)
sorted_group = sorted(group, key = lambda labels: (len(labels), psl_key(labels[-1][0]), labels))

if group != sorted_group:
warning('Incorrectly sorted group of domains')
print(" " + str(group))
print(" " + str(sorted_group))
print("Correct sorting would be:")
print_psl(sorted_group)

finally:
del group[:]


def lint_psl(infile):
"""Parses PSL file and performs syntax checking"""
global orig_line, nline

PSL_FLAG_EXCEPTION = (1<<0)
PSL_FLAG_WILDCARD = (1<<1)
PSL_FLAG_ICANN = (1<<2) # entry of ICANN section
PSL_FLAG_PRIVATE = (1<<3) # entry of PRIVATE section
PSL_FLAG_PLAIN = (1<<4) #just used for PSL syntax checking

line2number = {}
line2flag = {}
group = []
section = 0
icann_sections = 0
private_sections = 0

lines = [line.strip('\n') for line in infile]

for line in lines:
nline += 1

# check for leadind/trailing whitespace
stripped = line.strip()
if stripped != line:
line = line.replace('\t','\\t')
line = line.replace('\r','^M')
orig_line = line
warning('Leading/Trailing whitespace')
orig_line = line
line = stripped

# empty line (end of sorted domain group)
if not line:
# check_order(group)
continue

# check for section begin/end
if line[0:2] == "//":
# check_order(group)

if section == 0:
if line == "// ===BEGIN ICANN DOMAINS===":
section = PSL_FLAG_ICANN
icann_sections += 1
elif line == "// ===BEGIN PRIVATE DOMAINS===":
section = PSL_FLAG_PRIVATE
private_sections += 1
elif line[3:11] == "===BEGIN":
error('Unexpected begin of unknown section')
elif line[3:9] == "===END":
error('End of section without previous begin')
elif section == PSL_FLAG_ICANN:
if line == "// ===END ICANN DOMAINS===":
section = 0
elif line[3:11] == "===BEGIN":
error('Unexpected begin of section: ')
elif line[3:9] == "===END":
error('Unexpected end of section')
elif section == PSL_FLAG_PRIVATE:
if line == "// ===END PRIVATE DOMAINS===":
section = 0
elif line[3:11] == "===BEGIN":
error('Unexpected begin of section')
elif line[3:9] == "===END":
error('Unexpected end of section')

continue # processing of comments ends here

# No rule must be outside of a section
if section == 0:
error('Rule outside of section')

group.append(list(reversed(line.split('.'))))

# decode UTF-8 input into unicode, needed only for python 2.x
try:
if sys.version_info[0] < 3:
line = line.decode('utf-8')
else:
line.encode('utf-8')
except (UnicodeDecodeError, UnicodeEncodeError):
orig_line = None
error('Invalid UTF-8 character')
continue

# each rule must be lowercase (or more exactly: not uppercase and not titlecase)
if line != line.lower():
error('Rule must be lowercase')

# strip leading wildcards
flags = section
# while line[0:2] == '*.':
if line[0:2] == '*.':
flags |= PSL_FLAG_WILDCARD
line = line[2:]

if line[0] == '!':
flags |= PSL_FLAG_EXCEPTION
line = line[1:]
else:
flags |= PSL_FLAG_PLAIN

# wildcard and exception must not combine
if flags & PSL_FLAG_WILDCARD and flags & PSL_FLAG_EXCEPTION:
error('Combination of wildcard and exception')
continue

labels = line.split('.')

if flags & PSL_FLAG_EXCEPTION and len(labels) > 1:
domain = ".".join(str(label) for label in labels[1:])
if not domain in line2flag:
error('Exception without previous wildcard')
elif not line2flag[domain] & PSL_FLAG_WILDCARD:
error('Exception without previous wildcard')

for label in labels:
if not label:
error('Leading/trailing or multiple dot')
continue

if label[0:4] == 'xn--':
error('Punycode found')
continue

if '--' in label:
error('Double minus found')
continue

# allowed are a-z,0-9,- and unicode >= 128 (maybe that can be finetuned a bit !?)
for c in label:
if not c.isalnum() and c != '-' and ord(c) < 128:
error('Illegal character')
break

if line in line2flag:
'''Found existing entry:
Combination of exception and plain rule is contradictionary
!foo.bar + foo.bar
Doublette, since *.foo.bar implies foo.bar:
foo.bar + *.foo.bar
Allowed:
!foo.bar + *.foo.bar
'''
error('Found doublette/ambiguity (previous line was %d)' % line2number[line])

line2number[line] = nline
line2flag[line] = flags

orig_line = None

if section == PSL_FLAG_ICANN:
error('ICANN section not closed')
elif section == PSL_FLAG_PRIVATE:
error('PRIVATE section not closed')

if icann_sections < 1:
warning('No ICANN section found')
elif icann_sections > 1:
warning('%d ICANN sections found' % icann_sections)

if private_sections < 1:
warning('No PRIVATE section found')
elif private_sections > 1:
warning('%d PRIVATE sections found' % private_sections)

def usage():
"""Prints the usage"""
print('usage: %s PSLfile' % sys.argv[0])
print('or %s - # To read PSL from STDIN' % sys.argv[0])
exit(1)


def main():
"""Check syntax of a PSL file"""
if len(sys.argv) < 2:
usage()

with sys.stdin if sys.argv[-1] == '-' else open(sys.argv[-1], 'r', encoding='utf-8', errors="surrogateescape") as infile:
lint_psl(infile)

return errors != 0


if __name__ == '__main__':
sys.exit(main())
31 changes: 31 additions & 0 deletions linter/pslint_selftest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/sh

rc=0
rm -rf log
mkdir -p log

# add CR if missing, it won't possibly survive git
sed -i -e 's/^e.example.com$/e.example.com\r/g' test_spaces.input

for file in `ls *.input|cut -d'.' -f1`; do
echo -n "${file}: "
./pslint.py ${file}.input >log/${file}.log 2>&1
diff -u ${file}.expected log/${file}.log >log/${file}.diff
if [ $? -eq 0 ]; then
echo OK
rm log/${file}.diff log/${file}.log
else
echo FAILED
cat log/${file}.diff
rc=1
fi
done

# remove CR, to not appear as changed to git
sed -i -e 's/^e.example.com\r$/e.example.com/g' test_spaces.input

if [ $rc -eq 0 ]; then
rmdir log
fi

exit $rc
Loading

0 comments on commit e2f2f4b

Please sign in to comment.