feat: programmatically list supported dialects

reata · Oct 15, 2023 · fbe8f40 · fbe8f40
1 parent a3d754b
commit fbe8f40
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 23 deletions.
diff --git a/sqllineage/cli.py b/sqllineage/cli.py
@@ -110,28 +110,10 @@ def main(args=None) -> None:
     elif args.graph_visualization:
         return draw_lineage_graph(**{"host": args.host, "port": args.port})
     elif args.dialects:
-        print(
-            """non-validating
-ansi
-athena
-bigquery
-clickhouse
-databricks
-db2
-exasol
-hive
-materialize
-mysql
-oracle
-postgres
-redshift
-snowflake
-soql
-sparksql
-sqlite
-teradata
-tsql"""
-        )
+        dialects = []
+        for _, supported_dialects in LineageRunner.supported_dialects().items():
+            dialects += supported_dialects
+        print("\n".join(dialects))
     else:
         parser.print_help()
 

diff --git a/sqllineage/core/analyzer.py b/sqllineage/core/analyzer.py
@@ -1,4 +1,5 @@
 from abc import abstractmethod
+from typing import List
 
 from sqllineage.core.holders import StatementLineageHolder
 
@@ -8,6 +9,9 @@ class LineageAnalyzer:
     Parser specific implementation should inherit this class and implement analyze method
     """
 
+    PARSER_NAME: str = ""
+    SUPPORTED_DIALECTS: List[str] = []
+
     @abstractmethod
     def analyze(self, sql: str) -> StatementLineageHolder:
         """

diff --git a/sqllineage/core/parser/sqlfluff/analyzer.py b/sqllineage/core/parser/sqlfluff/analyzer.py
@@ -1,7 +1,7 @@
 import warnings
 from typing import Dict, List
 
-from sqlfluff.core import Linter, SQLLexError, SQLParseError
+from sqlfluff.core import Linter, SQLLexError, SQLParseError, dialect_readout
 from sqlfluff.core.parser import BaseSegment
 
 from sqllineage.core.analyzer import LineageAnalyzer
@@ -17,6 +17,9 @@
 class SqlFluffLineageAnalyzer(LineageAnalyzer):
     """SQL Statement Level Lineage Analyzer for `sqlfluff`"""
 
+    PARSER_NAME = "sqlfluff"
+    SUPPORTED_DIALECTS = list(dialect.label for dialect in dialect_readout())
+
     def __init__(self, dialect: str):
         self._dialect = dialect
         self.tsql_split_cache: Dict[str, BaseSegment] = {}

diff --git a/sqllineage/core/parser/sqlparse/analyzer.py b/sqllineage/core/parser/sqlparse/analyzer.py
@@ -35,6 +35,9 @@
 class SqlParseLineageAnalyzer(LineageAnalyzer):
     """SQL Statement Level Lineage Analyzer."""
 
+    PARSER_NAME = "sqlparse"
+    SUPPORTED_DIALECTS = ["non-validating"]
+
     def analyze(self, sql: str) -> StatementLineageHolder:
         # get rid of comments, which cause inconsistencies in sqlparse output
         stmt = sqlparse.parse(trim_comment(sql))[0]

diff --git a/sqllineage/runner.py b/sqllineage/runner.py
@@ -1,5 +1,6 @@
 import logging
 import warnings
+from collections import OrderedDict
 from typing import Dict, List, Optional, Tuple
 
 from sqllineage import DEFAULT_DIALECT, SQLPARSE_DIALECT
@@ -186,3 +187,23 @@ def _eval(self):
         self._stmt_holders = [analyzer.analyze(stmt) for stmt in self._stmt]
         self._sql_holder = SQLLineageHolder.of(*self._stmt_holders)
         self._evaluated = True
+
+    @staticmethod
+    def supported_dialects() -> Dict[str, List[str]]:
+        """
+        an ordered dict (so we can make sure the default parser implementation comes first)
+        with kv as parser_name: dialect list
+        """
+        dialects = OrderedDict(
+            [
+                (
+                    SqlParseLineageAnalyzer.PARSER_NAME,
+                    SqlParseLineageAnalyzer.SUPPORTED_DIALECTS,
+                ),
+                (
+                    SqlFluffLineageAnalyzer.PARSER_NAME,
+                    SqlFluffLineageAnalyzer.SUPPORTED_DIALECTS,
+                ),
+            ]
+        )
+        return dialects
diff --git a/sqllineagejs/src/App.js b/sqllineagejs/src/App.js
@@ -94,8 +94,11 @@ const dialects = {
     "athena",
     "bigquery",
     "clickhouse",
+    "databricks",
     "db2",
+    "duckdb",
     "exasol",
+    "greenplum",
     "hive",
     "materialize",
     "mysql",
@@ -107,6 +110,7 @@ const dialects = {
     "sparksql",
     "sqlite",
     "teradata",
+    "trino",
     "tsql"
   ]
 }