panoramichq · dvdotsenko · Nov 8, 2020 · Nov 8, 2020 · Nov 8, 2020 · Nov 8, 2020
diff --git a/Makefile b/Makefile
@@ -9,42 +9,95 @@ VENDOR_NAME:=panoramic
 IMAGE_NAME:=tel-grammar
 JAVA_IMAGE_NAME_FULL?=$(VENDOR_NAME)/java-$(IMAGE_NAME)
 PYTHON_IMAGE_NAME_FULL?=$(VENDOR_NAME)/python-$(IMAGE_NAME)
+PYTHON_IMAGE_TESTS_NAME_FULL?=$(VENDOR_NAME)/python-$(IMAGE_NAME)-tests
 
 WORKDIR=/usr/src/app
 
 image-java:
 	docker build \
-	    --pull \
+		--pull \
 		-t $(JAVA_IMAGE_NAME_FULL):latest \
 		-f docker/Dockerfile-java .
 
 image-python:
 	docker build \
-	    --pull \
+		--pull \
 		-t $(PYTHON_IMAGE_NAME_FULL):latest \
 		-f docker/Dockerfile-python .
 
 PHONY: image-java image-python
 
-test: image-python
-	docker run --rm ${PYTHON_IMAGE_NAME_FULL}:latest python -m pytest tests
 
+_TEST_IMAGE_MARKER:=/tmp/.$(VENDOR_NAME)-$(IMAGE_NAME)-testrunner-done
+$(_TEST_IMAGE_MARKER): python/requirements.txt python/requirements-tests.txt
+	docker build \
+		-t $(PYTHON_IMAGE_TESTS_NAME_FULL) \
+		-f docker/Dockerfile-python-tests .
+	touch $(_TEST_IMAGE_MARKER)
 
-.PHONY: test
-
+test-dev: $(_TEST_IMAGE_MARKER)
+	docker run -it --rm \
+		-v $(PWD)/python:$(WORKDIR) \
+		--workdir ${WORKDIR} \
+		$(PYTHON_IMAGE_TESTS_NAME_FULL) \
+		pytest -s tests/
 
-build-code-python:
-	docker run \
+# see shipping/Jenkinsfile and keep in sync
+test:
+	docker run -it --rm \
 		-v $(PWD):$(WORKDIR) \
 		--workdir ${WORKDIR} \
-		--rm ${JAVA_IMAGE_NAME_FULL}:latest \
-		java -Xmx500M -cp '/usr/local/lib/antlr-4.8-complete.jar:$$CLASSPATH' org.antlr.v4.Tool -visitor -Dlanguage=Python3 -o python/src/tel_grammar/antlr -Xexact-output-dir grammar/Tel.g4
-
-build-code-js:
-	docker run \
+		python:3.7 \
+		bash -c "pip install --upgrade tox && tox -e py37 -c python/tox.ini"
+	docker run -it --rm \
+		-v $(PWD):$(WORKDIR) \
+		--workdir ${WORKDIR} \
+		python:3.8 \
+		bash -c "pip install --upgrade tox && tox -e py38 -c python/tox.ini"
+	docker run -it --rm \
 		-v $(PWD):$(WORKDIR) \
 		--workdir ${WORKDIR} \
-		--rm ${JAVA_IMAGE_NAME_FULL}:latest \
-		java -Xmx500M -cp '/usr/local/lib/antlr-4.8-complete.jar:$$CLASSPATH' org.antlr.v4.Tool -visitor -Dlanguage=JavaScript -o js-temp/ -Xexact-output-dir grammar/Tel.g4
+		python:3.9 \
+		bash -c "pip install --upgrade tox && tox -e py39 -c python/tox.ini"
+
+.PHONY: test test-dev
+
+image-antlr:
+	DOCKER_BUILDKIT=1 docker build \
+		-t antlr \
+		-f docker/Dockerfile-antlr .
+
+# https://github.com/antlr/antlr4/issues/2335
+# solves "cannot find token file" error
+grammar/PqlLexer.tokens: grammar/PqlLexer.g4
+	docker run --rm \
+		-v $(PWD):/mnt \
+		antlr \
+			-o ./ \
+			grammar/PqlLexer.g4
+
+build-code-python: grammar/PqlLexer.tokens grammar/PqlParser.g4# image-antlr
+	docker run --rm \
+		-v $(PWD):/mnt \
+		antlr \
+			-visitor \
+			-Dlanguage=Python3 \
+			-Xexact-output-dir \
+			-o python/src/pql_grammar/antlr \
+			grammar/PqlLexer.g4 \
+			grammar/PqlParser.g4
+
+build-code-js: grammar/PqlLexer.tokens grammar/PqlParser.g4 # image-antlr
+	docker run --rm \
+		-v $(PWD):/mnt \
+		antlr \
+			-visitor \
+			-Dlanguage=JavaScript \
+			-Xexact-output-dir \
+			-o js-temp/ \
+			grammar/PqlLexer.g4 \
+			grammar/PqlParser.g4
+
+build-code: build-code-python build-code-js
 
-.PHONY: build-code-python
+.PHONY: image-antlr build-code-python build-code-js build-code
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ To release a new version of the library, follow these steps:
 ## Introduction
 
 This repository contains formal definition of grammar for TEL written in [ANTLR v4](https://github.com/antlr/antlr4).
-It can generate following components in both python and JavaScript to handle parsing string expressions:
+It can generate following components in Python, JavaScript to handle parsing string expressions:
 
 - *lexer* - splits string expression into tokens
 - *parser* - connects tokens into parse tree (similar to AST)
@@ -25,25 +25,27 @@ It can generate following components in both python and JavaScript to handle par
 
 Current documentation on the language is available [here](https://diesel-service.operamprod.com/documentation#taxon-expression-language-tel).
 
-## How to use it
+## Local Development
 
-### `make image-java`
+### `make image-antlr`
 
-It builds local docker image to run ANTLR commands. You need to run this command before you may run ANTLR-related make commands.
-
-### `make image-python`
-
-It builds local docker image to run python tests. This image is used to run tests on the current grammar.
+It builds local docker image to run ANTLR commands. 
+You need to run this command before you may run ANTLR-related make commands.
 
 ### `make build-code-python`
 
 It generates all components in python language
 
-
 ### `make build-code-js`
 
 It generates all components in JavaScript language
 
+### `make test-dev`
+
+Runs tests on the current version of grammar in quick mode.
+Reuses pre-built python image (3.8) to mount local python code and tests and run them.
+
 ### `make test`
 
-Runs tests on the current version of grammar.
+Runs same tests as above, but against multiple supported python versions, using TOX config.
+(Takes much much longer to run because each python image is built from scratch each time.)
diff --git a/docker/Dockerfile-antlr b/docker/Dockerfile-antlr
@@ -0,0 +1,12 @@
+FROM java:8
+
+ENV ANTLR_VERSION=4.8
+ENV CLASSPATH .:/antlr-${ANTLR_VERSION}-complete.jar:$CLASSPATH
+
+ADD http://www.antlr.org/download/antlr-${ANTLR_VERSION}-complete.jar /usr/bin/
+RUN chmod +r /usr/bin/antlr-${ANTLR_VERSION}-complete.jar \
+    && ln /usr/bin/antlr-${ANTLR_VERSION}-complete.jar /usr/bin/antlr.jar
+
+WORKDIR /mnt
+
+ENTRYPOINT ["java", "-jar", "/usr/bin/antlr.jar"]
diff --git a/docker/Dockerfile-python-tests b/docker/Dockerfile-python-tests
@@ -0,0 +1,27 @@
+ARG PYTHON_VERSION=3.8
+FROM python:${PYTHON_VERSION} as baseimage
+
+ARG WORKDIR=/usr/src/app
+WORKDIR $WORKDIR
+
+ARG PYTHONUSERBASE=/usr/src/lib
+
+# PYTHONUNBUFFERED: Force stdin, stdout and stderr to be totally unbuffered. (equivalent to `python -u`)
+# PYTHONHASHSEED: Enable hash randomization (equivalent to `python -R`)
+# PYTHONDONTWRITEBYTECODE: Do not write byte files to disk, since we maintain it as readonly. (equivalent to `python -B`)
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONHASHSEED=random \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUSERBASE=$PYTHONUSERBASE \
+    PATH="${PYTHONUSERBASE}/bin:${PATH}"
+
+# Setup PYTHONUSERBASE directory
+# we allow running / managing these folders by non-root users. Thus need chmod
+RUN set -ex; \
+    mkdir -p $PYTHONUSERBASE && chmod 777 ${PYTHONUSERBASE}; \
+    mkdir -p $WORKDIR && chmod 777 ${WORKDIR}
+
+COPY python/requirements.txt python/requirements-tests.txt ./
+RUN pip install \
+    -r requirements.txt \
+    -r requirements-tests.txt
diff --git a/grammar/.gitignore b/grammar/.gitignore
@@ -0,0 +1,3 @@
+*.*
+!*.g4
+!.gitignore
diff --git a/grammar/PqlLexer.g4 b/grammar/PqlLexer.g4
@@ -0,0 +1,112 @@
+lexer grammar PqlLexer;
+
+// mostly SQL-compatible (except for some TEL-isms where marked):
+
+AND : '&&'; // TEL
+EQ : '==';
+GT_EQ : '>=';
+LT_EQ : '<=';
+NOT_EQ1 : '!=';
+NOT_EQ2 : '<>';
+OR : '||'; // TEL. !! CONFLICT WITH SQL where it's string concatenator !!
+SHIFT_LEFT : '<<';
+SHIFT_RIGHT : '>>';
+
+AMP : '&';
+ASSIGN : '=';
+CLOSE_PAREN : ')';
+COLON: ':';
+COMMA : ',';
+DOT : '.';
+FORWARD_SLASH : '/';
+GT : '>';
+LT : '<';
+MINUS : '-';
+MOD : '%';
+OPEN_PAREN : '(';
+PIPE : '|';
+PLUS : '+';
+QUESTION_MARK: '?';
+SCOL : ';';
+STAR : '*';
+TILDE : '~';
+UNDER: '_';
+
+// SQL keywords we adapt:
+K_AND : A N D;
+K_BETWEEN : B E T W E E N;
+K_FALSE : F A L S E;
+K_ILIKE: I L I K E ;
+K_IN : I N;
+K_IS : I S;
+K_ISNULL : I S N U L L;
+K_LIKE : L I K E;
+K_NOT : N O T;
+K_NOTNULL : N O T N U L L;
+K_NULL : N U L L;
+K_OR : O R;
+K_TRUE : T R U E;
+
+NUMERIC_LITERAL
+ : DIGIT+ ( '.' DIGIT* )? ( E [-+]? DIGIT+ )?
+ | '.' DIGIT+ ( E [-+]? DIGIT+ )?
+ ;
+
+// Note, use of TEL escaping variant,
+// escaping is NOT SQL style "double-char":
+// TODO: allow both in TEL to avoid translation headaches
+DOUBLE_QUOTED_STRING: DOUBLE_QUOTED_STRING_TEL ;
+DOUBLE_QUOTED_STRING_TEL : '"' ( '\\"' | ~'"' )* '"' ;
+DOUBLE_QUOTED_STRING_SQL : '"' ( '""' | ~'"' )* '"' ;
+
+// Note, use of TEL escaping variant,
+// Note, escaping is NOT SQL style "double-char":
+// TODO: allow both in TEL to avoid translation headaches
+SINGLE_QUOTED_STRING: SINGLE_QUOTED_STRING_TEL ;
+SINGLE_QUOTED_STRING_TEL: '\'' ( '\\\'' | ~'\'' )* '\'' ;
+SINGLE_QUOTED_STRING_SQL: '\'' ( '\'\'' | ~'\'' )* '\'' ;
+
+SINGLE_LINE_COMMENT
+ : ('--'|'//'|'#') ~[\r\n]* -> channel(HIDDEN)
+ ;
+
+MULTILINE_COMMENT
+ : '/*' .*? ( '*/' | EOF ) -> channel(HIDDEN)
+ ;
+
+SPACES
+ : [ \u000B\t\r\n] -> channel(HIDDEN)
+ ;
+
+WORD
+ : [a-zA-Z_][a-zA-Z_0-9]*
+ ;
+
+fragment DIGIT : [0-9];
+
+fragment A : [aA];
+fragment B : [bB];
+fragment C : [cC];
+fragment D : [dD];
+fragment E : [eE];
+fragment F : [fF];
+fragment G : [gG];
+fragment H : [hH];
+fragment I : [iI];
+fragment J : [jJ];
+fragment K : [kK];
+fragment L : [lL];
+fragment M : [mM];
+fragment N : [nN];
+fragment O : [oO];
+fragment P : [pP];
+fragment Q : [qQ];
+fragment R : [rR];
+fragment S : [sS];
+fragment T : [tT];
+fragment U : [uU];
+fragment V : [vV];
+fragment W : [wW];
+fragment X : [xX];
+fragment Y : [yY];
+fragment Z : [zZ];
diff --git a/grammar/PqlParser.g4 b/grammar/PqlParser.g4
@@ -0,0 +1,72 @@
+/*
+SQL-inspired "Pano Query Language" syntax
+focusing on Expressions
+
+Weird parts:
+- Taxon is a SQL-column-like object with similar heritage (namespace etc)
+  and extra syntax for optionality
+- Some operator characters are more "programming" than SQL
+  Example: Eq compare '==' vs SQL-like '=' (though '=' could be converted to '==' internally)
+*/
+
+parser grammar PqlParser;
+
+options {
+  tokenVocab = PqlLexer;
+}
+
+// entry point
+parseTel: expr EOF ;
+
+expr
+ : unary_operator=( MINUS | PLUS | K_NOT ) right=expr
+ | left=expr operator=( STAR | FORWARD_SLASH | MOD ) right=expr
+ | left=expr operator=( PLUS | MINUS ) right=expr
+ | left=expr operator=( LT | LT_EQ | GT | GT_EQ ) right=expr
+ | left=expr operator=( ASSIGN | EQ | NOT_EQ1 | NOT_EQ2 | K_IS ) right=expr
+ | left=expr is_negated=K_NOT? operator=(K_LIKE | K_ILIKE) right=expr
+ | left=expr is_negated=K_NOT? operator=K_IN OPEN_PAREN right_list=exprList CLOSE_PAREN
+ | left=expr operator=( K_AND | AND ) right=expr
+ | left=expr operator=( K_OR | OR ) right=expr
+ // BETWEEN must come after AND or risk being parsed before it
+ // resulting in `a BETWEEN b` where `AND c` fragment is outside of BETWEEN expression
+ | left=expr is_negated=K_NOT? operator=K_BETWEEN right=expr
+ | OPEN_PAREN inner=expr CLOSE_PAREN
+ | literalValue
+ | fn
+ | taxon
+ ;
+
+exprList: expr ( COMMA expr )* ;
+
+// Note that function supports optional list of arguments trapped as `expr`
+// which allows us to have
+//  named (`arg1=value1, arg2=value2'` and
+//  positional (`value1, value2`) args.
+// Named ones will come as `expr` with left=expr,operator=ASSIGN,right=expr contents.
+//  You might need to express these as ordered dict / list of tuples to preserve names of args.
+// Positional will be whatever literal or other single-valued expr content could be.
+fn: function_name=identifierMultipart OPEN_PAREN arguments=fnArgs? CLOSE_PAREN ;
+fnArgs: fnArg ( COMMA fnArg)* ;
+fnArg: ( argument_name=WORD ASSIGN)? argument_value=expr ;
+
+// TODO: TAXON_TAG_DELIMITER is being killed off. Remove when we migrate out of taxon tags.
+taxon:
+    is_optional=QUESTION_MARK?
+    ( namespace=identifierMultipart PIPE )?
+    slug=identifierMultipart
+    // TODO: drop this when we drop Data Tags system.
+    // May conflict with TypeCast expression
+    ( COLON tag=identifierMultipart )?
+ ;
+
+identifierMultipart: WORD ( DOT WORD )* ;
+
+literalValue
+ : NUMERIC_LITERAL
+ | DOUBLE_QUOTED_STRING
+ | SINGLE_QUOTED_STRING
+ | K_NULL
+ | K_TRUE
+ | K_FALSE
+ ;