In [1110]:
#hide
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False  # workaround for buggy jedi

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1111]:
# default_exp core

In [1112]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [1113]:
#export
import re
from ptypysql.utils import *

# core

> Core functions for SQL formatting

In [1114]:
#hide
from nbdev.showdoc import *

## General formatting

Basic formatting for SQL queries. Let's use an example throughout the core module.

This is how an input could look like

In [1115]:
example_sql = """
create or replace table mytable as -- Mytable example
/* multi line
   comment */
seLecT a.asdf,
-- some line comment
b.qwer, -- some comment here
/* and here is a line comment inside select */
substr(c.asdf,1,2) as substr_asdf, 
/* some commenT 
there */
case when a.asdf= 1 then 'b' /* here a case comment */
when b.qwer =2 then 'c' else 'd' end as new_field, -- Some comment
/* and here some inline comment */
b.asdf2 frOm table1 as a leFt join 
table2 as b -- and here a comment
    on a.asdf = b.asdf  /* joiN this way */
    inner join table3 as c
on a.asdf=c.asdf and a.qwer= b.qwer
whEre a.asdf= 1 -- comment this
anD b.qwer =2 and a.asdf<=1 --comment that
or b.qwer>=5
groUp by a.asdf
"""

and this is how we would like to format it

In [1116]:
expected_sql = """CREATE OR REPLACE TABLE mytable AS -- Mytable example
/* multi line
   comment */
SELECT a.asdf,
    -- some line comment
    b.qwer, -- some comment here
    /* and here is a line comment inside select */
    substr(c.asdf, 1, 2) AS substr_asdf,
    /* some commenT
       there */
    CASE WHEN a.asdf = 1
        THEN 'b' /* here a case comment */
        WHEN b.qwer = 2
        THEN 'c'
        ELSE 'd'
    END AS new_field, -- Some comment
    /* and here some inline comment */
    b.asdf2
FROM table1 AS a
    LEFT JOIN table2 AS b -- and here a comment
        ON a.asdf = b.asdf /* joiN this way */
    INNER JOIN table3 AS c
        ON a.asdf = c.asdf
        AND a.qwer = b.qwer
WHERE a.asdf = 1 -- comment this
    AND b.qwer = 2
    AND a.asdf <= 1 --comment that
    OR b.qwer >= 5
GROUP BY a.asdf"""

Let's start by defining the main statements. The main statements all require a new line and should be in upper case

In [1117]:
#export
MAIN_STATEMENTS = [
    "create.*?table",  # regex for all variants, e.g. CREATE OR REPLACE TABLE
    "create.*?view",  # regex for all variants, e.g. CREATE OR REPLACE VIEW
    "with",
    "select distinct",
    "select",
    "from(?!\sr?\')", # special handles for from as a part of substring function
    "(?:natural\s|full\s)?(?:left\s|right\s|inner\s|outer\s|cross\s)?join",
    "union",
    "intersect",
    "on",
    "where",
    "group by",
    "having",
    "order by",
    "over",  # special case: no newline, only capitalized
    "partition by",  # special case: no newline, only capitalized
    "limit",
]

CAP_STATEMENTS = [
    "between",
    "and",
    "or",
    "not",
    "case",
    "when",
    "else",
    "then",
    "end",
    "any",
    "all",
    "exists",
    "in",
    "filter",
    "offset",
    "as",
    "is",
    "like",
    "similar to",
    "null",
    "true",
    "false",
    "unknown",
    "asc",
    "desc"
]
    # "count(",
    # "sum(",
    # "avg(",
    # "min(",
    # "max(",
    # "least(",
    # "greatest(",
    # "date_trunc(",
    # "coalesce(",
    # "trunc(",
    # "sqrt(",
    # "abs(",
    # "cbrt(",
    # "ceil(",
    # "floor(",
    # "degrees(",
    # "div(",
    # "exp(",
    # "ln(",
    # "log(",
    # "mod(",
    # "pi()",
    # "power(",
    # "radians(",
    # "round(",
    # "sign(",
    # "width_bucket(",
    # "random()",
    # "setseed(",
    # "acos(",
    # "asin(",
    # "atan(",
    # "cos(",
    # "cot(",
    # "sin(",
    # "tan(",


> Remark: For OVER and PARTITION BY we only capitalize without adding a newline

We need to remove newlines and multiple spaces because they may be arbitrary. 

Before removing newlines we also need to mark the end of coments with the special token [C] because we would not know where they end

We also mark the begin of comments /* */ with the special token [CS] (comment start) if they start in a newline

In [1118]:
#export
def clean_query(s):
    "Remove redundant whitespaces, mark comments boundaries and remove newlines afterwards in query `s`"
    s = add_whitespaces_after_comma(s)  # add whitespaces after comma but no in comments or quotes
    s = remove_redundant_whitespaces(s)  # remove too many whitespaces but no newlines
    s = mark_comments(s)  # mark comments with special tokens [C], [CS] and [CI]
    s = replace_newline_chars(s)  # remove newlines but not in the comments
    s = remove_whitespaces_newline(s)  # remove whitespaces after and before newline
    s = remove_whitespaces_comments(s)  # remove whitespaces after and before [C], [CS] and [CI]
    s = remove_whitespaces_parenthesis(s)  # remove whitespaces between parenthesis
    s = remove_redundant_whitespaces(s)  # remove too many whitespaces but no newlines
    return s

In [1119]:
assert_and_print(
    clean_query(
"""
SELECT asdf, qwer,
 qwer1,    qwer2
FROM table1
"""
    ), "SELECT asdf, qwer, qwer1, qwer2 FROM table1"
)

SELECT asdf, qwer, qwer1, qwer2 FROM table1


With usual comments

In [1120]:
assert_and_print(
    clean_query("""
SELECT asdf, qwer, -- some comment
 qwer1,    qwer2
FROM table1
WHERE asdf=1
"""), "SELECT asdf, qwer, -- some comment[C]qwer1, qwer2 FROM table1 WHERE asdf=1"
)

SELECT asdf, qwer, -- some comment[C]qwer1, qwer2 FROM table1 WHERE asdf=1


With other comment form

In [1121]:
assert_and_print(
    clean_query("""
SELECT asdf, qwer, /*  another comment */
qwer1,    qwer2
FROM table1
WHERE asdf=1
"""), "SELECT asdf, qwer, /* another comment */[C]qwer1, qwer2 FROM table1 WHERE asdf=1"
)

SELECT asdf, qwer, /* another comment */[C]qwer1, qwer2 FROM table1 WHERE asdf=1


In [1122]:
assert_and_print(
    clean_query(
"""
SELECT asdf, qwer, /*  another comment */
qwer1,
/* inline comment */
qwer2
FROM table1
WHERE asdf=1
"""
    ), 
    "SELECT asdf, qwer, /* another comment */[C]qwer1,[CS]/* inline comment */[C]qwer2 FROM table1 WHERE asdf=1"
)

SELECT asdf, qwer, /* another comment */[C]qwer1,[CS]/* inline comment */[C]qwer2 FROM table1 WHERE asdf=1


In [1123]:
assert_and_print(
    clean_query(
"""
SELECT asdf,
qwer1, -- comment 1
-- inline comment
qwer2
FROM table1
WHERE asdf=1
"""
    ), 
    "SELECT asdf, qwer1, -- comment 1[C][CS]-- inline comment[C]qwer2 FROM table1 WHERE asdf=1"
)

SELECT asdf, qwer1, -- comment 1[C][CS]-- inline comment[C]qwer2 FROM table1 WHERE asdf=1


In [1124]:
assert_and_print(
    clean_query(
"""
create or replace my_table as
/* some comment
   some new comment line */
select asdf,
qwer,   qwer2

from table1
"""
    ),
"create or replace my_table as[CS]/* some comment[CI]some new comment line */[C]select asdf, qwer, qwer2 from table1"
)

create or replace my_table as[CS]/* some comment[CI]some new comment line */[C]select asdf, qwer, qwer2 from table1


In [1125]:
assert_and_print(
    clean_query(
"""
create or replace table my_table as
select substr( asdf, 1, 2 ) as qwer,
qwer over (
PARTITION BY asdf
)
from table1
"""
    ), "create or replace table my_table as select substr(asdf, 1, 2) as qwer, qwer over (PARTITION BY asdf) from table1"
)

create or replace table my_table as select substr(asdf, 1, 2) as qwer, qwer over (PARTITION BY asdf) from table1


In [1126]:
print(clean_query(example_sql))

create or replace table mytable as -- Mytable example[C][CS]/* multi line[CI]comment */[C]seLecT a.asdf,[CS]-- some line comment[C]b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) as substr_asdf,[CS]/* some commenT[CI]there */[C]case when a.asdf= 1 then 'b' /* here a case comment */[C]when b.qwer =2 then 'c' else 'd' end as new_field, -- Some comment[C][CS]/* and here some inline comment */[C]b.asdf2 frOm table1 as a leFt join table2 as b -- and here a comment[C]on a.asdf = b.asdf /* joiN this way */[C]inner join table3 as c on a.asdf=c.asdf and a.qwer= b.qwer whEre a.asdf= 1 -- comment this[C]anD b.qwer =2 and a.asdf<=1 --comment that[C]or b.qwer>=5 groUp by a.asdf


### Preformatting queries

We would like to have each main statement (SELECT, FROM , ...) in a separate line and in uppercase, ignoring text in comments. This way we can then later format each query statement separately. Also for the special case with PARTITION BY in the SELECT statement we will not add a newline because the main statement in that case is SELECT and not PARTITION BY

In [1127]:
#export
def preformat_statements(s):
    """Write a newline in `s` for all `statements` and
    uppercase them but not if they are inside a comment"""
    statements = MAIN_STATEMENTS
    s = clean_query(s)  # clean query and mark comments
    split_s = split_query(s)  # split by comment and non comment
    split_s = compress_dicts(split_s, ["comment", "select"])
    # compile regex before loop
    create_re = re.compile(r"\bcreate\b", flags=re.I)
    select_re = re.compile(r"\bselect\b", flags=re.I)
    for statement in statements:
        if create_re.match(statement):  # special case CREATE with AS capitalize as well
            create_sub = re.compile(rf"\s*({statement} )(.*) as\b", flags=re.I)
            split_s = [{
                "string": create_sub.sub(
                    lambda pat: "\n" + pat.group(1).upper() + pat.group(2) + " AS", 
                    sdict["string"], 
                    ) if not sdict["comment"] else sdict["string"],
                    "comment": sdict["comment"],
                    "select": sdict["select"]
                } for sdict in split_s]
        else:  # normal main statements
            non_select_region_re = re.compile(rf"\s*\b({statement})\b", flags=re.I)
            select_region_statement_re = re.compile(rf"\b({statement})\b", flags=re.I)
            split_s = [{
                "string": non_select_region_re.sub(lambda x: "\n" + x.group(1).upper(), sdict["string"]) 
                    if not sdict["comment"] and not sdict["select"]  # no comment, no select region
                    else non_select_region_re.sub(lambda x: "\n" + x.group(1).upper(), sdict["string"]) 
                    if not sdict["comment"] and sdict["select"] and select_re.match(statement) # no comment, select region and select statement
                    else select_region_statement_re.sub(lambda x: x.group(1).upper(), sdict["string"]) 
                    if not sdict["comment"] and sdict["select"] and not select_re.match(statement) # no comment, select region and no select statement
                    else sdict["string"],
                "comment": sdict["comment"],
                "select": sdict["select"]
                } for sdict in split_s]

    # capital common and important functional words
    cap_statements = CAP_STATEMENTS
    for statement in cap_statements:
        split_s = [{
            "string": re.sub(rf"\b({statement})\b", statement.upper(), sdict["string"], flags=re.I) if not sdict["comment"] else sdict["string"],
            "comment": sdict["comment"],
            "select": sdict["select"]
        }for sdict in split_s]
        
    s = "".join([sdict["string"] for sdict in split_s])
    s = s.strip()  # strip string
    s = remove_whitespaces_newline(s)  # remove whitespaces before and after newline
    

    return s

In [1128]:
assert_and_print(
    preformat_statements("select asdf, qwer as new_var from table1 where asdf = 1"),
    "SELECT asdf, qwer AS new_var\nFROM table1\nWHERE asdf = 1"
)

SELECT asdf, qwer AS new_var
FROM table1
WHERE asdf = 1


In [1129]:
assert_and_print(preformat_statements("""
seLect asdf,
       /* some comment inside select */
       qwer
From   table1 where  asdf = 1
"""),
    "SELECT asdf,[CS]/* some comment inside select */[C]qwer\nFROM table1\nWHERE asdf = 1"
)

SELECT asdf,[CS]/* some comment inside select */[C]qwer
FROM table1
WHERE asdf = 1


In [1130]:
assert_and_print(
    preformat_statements("""
seLect asdf, /* some comment inside select */
       qwer
From   table1 where  asdf = 1
"""),
    "SELECT asdf, /* some comment inside select */[C]qwer\nFROM table1\nWHERE asdf = 1"
)

SELECT asdf, /* some comment inside select */[C]qwer
FROM table1
WHERE asdf = 1


In [1131]:
assert_and_print(
    preformat_statements("""
create or replace view my_view as
seLect asdf,
       /* some comment inside select */
       qwer
From   table1 where  asdf = 1
"""),
    "CREATE OR REPLACE VIEW my_view AS\nSELECT asdf,[CS]/* some comment inside select */[C]qwer\nFROM table1\nWHERE asdf = 1"
)

CREATE OR REPLACE VIEW my_view AS
SELECT asdf,[CS]/* some comment inside select */[C]qwer
FROM table1
WHERE asdf = 1


In [1132]:
assert_and_print(
    preformat_statements("""
create or replace view my_view as
seLect asdf,
       qwer_function,
       qwer
From   table1 where  asdf = 1
"""),
    "CREATE OR REPLACE VIEW my_view AS\nSELECT asdf, qwer_function, qwer\nFROM table1\nWHERE asdf = 1"
)

CREATE OR REPLACE VIEW my_view AS
SELECT asdf, qwer_function, qwer
FROM table1
WHERE asdf = 1


In [1133]:
assert_and_print(
    preformat_statements("""
create or replace view my_view as
seLect asdf, qwer_function,
       lead(asdf) over (Partition By asdf order BY qwer),
    qwer2
From   table1 where  asdf = 1 order by asdf
"""),
    """
CREATE OR REPLACE VIEW my_view AS
SELECT asdf, qwer_function, lead(asdf) OVER (PARTITION BY asdf ORDER BY qwer), qwer2
FROM table1
WHERE asdf = 1
ORDER BY asdf
""".strip()
)

CREATE OR REPLACE VIEW my_view AS
SELECT asdf, qwer_function, lead(asdf) OVER (PARTITION BY asdf ORDER BY qwer), qwer2
FROM table1
WHERE asdf = 1
ORDER BY asdf


In [1134]:
assert_and_print(
    preformat_statements(
"""
create or replace table mytable as -- Mytable example
seLecT a.asdf, b.qwer, -- some comment here
/* and here is a line comment inside select */
substr(c.asdf, 1, 2) as substr_asdf
"""
    ),
"""
CREATE OR REPLACE TABLE mytable AS -- Mytable example[C]
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) AS substr_asdf
""".strip()
)

CREATE OR REPLACE TABLE mytable AS -- Mytable example[C]
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) AS substr_asdf


In [1135]:
assert_and_print(
    preformat_statements(
"""
create table mytable as -- Mytable example
seLecT a.asdf, b.qwer, -- some comment here
/* and here is a line comment inside select */
substr(c.asdf, 1, 2) as substr_asdf
"""
    ),
"""
CREATE TABLE mytable AS -- Mytable example[C]
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) AS substr_asdf
""".strip()
)

CREATE TABLE mytable AS -- Mytable example[C]
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) AS substr_asdf


In [1136]:
assert_and_print(
    preformat_statements(
"""
create or replace table  my_table as -- mytable
select distinct asdf, qwer, -- some comment
from table1
"""
    ),
"""
CREATE OR REPLACE TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1
""".strip()
)

CREATE OR REPLACE TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1


With additional keyword `COMMENT`

In [1137]:
assert_and_print(
    preformat_statements(
"""
create or replace table  my_table comment='blabla' as -- mytable
select distinct asdf, qwer, -- some comment
from table1
"""
    ),
"""
CREATE OR REPLACE TABLE my_table comment='blabla' AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1
""".strip()
)

CREATE OR REPLACE TABLE my_table comment='blabla' AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1


In [1138]:
assert_and_print(
    preformat_statements(
"""
create or replace transient table  my_table as -- mytable
select distinct asdf, qwer, -- some comment
from table1
"""
    ),
"""
CREATE OR REPLACE TRANSIENT TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1
""".strip()
)

CREATE OR REPLACE TRANSIENT TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1


In [1139]:
assert_and_print(
    preformat_statements(
"""
Create view  my_table as -- mytable
select distinct asdf, qwer, -- some comment
from table1
"""
    ),
"""
CREATE VIEW my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1
""".strip()
)

CREATE VIEW my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1


## substring from case

In [1140]:
assert_and_print(
    preformat_statements(
"""
create or replace transient table  my_table as -- mytable
select distinct asdf, qwer, -- some comment
from table1 t1 join table2 t2 as t1.tset = substring(t2.tses from '\w+$')
"""
    ),
"""
CREATE OR REPLACE TRANSIENT TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1 t1
JOIN table2 t2 AS t1.tset = substring(t2.tses from '\w+$')
""".strip()
)

CREATE OR REPLACE TRANSIENT TABLE my_table AS -- mytable[C]
SELECT DISTINCT asdf, qwer, -- some comment[C]
FROM table1 t1
JOIN table2 t2 AS t1.tset = substring(t2.tses from '\w+$')


### Lowercasing query

In [1141]:
#export
def lowercase_query(s):
    "Lowercase query but let comments and text in quotes untouched"
    split_s = split_query(s)
    split_s = [
        d["string"]
        if d["comment"] or d["quote"]
        else d["string"].lower()
        for d in split_s
    ]
    s = "".join([s for s in split_s])
    return s    

In [1142]:
assert_and_print(
    lowercase_query("""
--- My nice view 1 --
Create or Replace VieW view_1 as
seLect asdf, -- Some Comment
qwER,
qwerTy, -- Some other comment
FROM table1
"""),
    """
--- My nice view 1 --
create or replace view view_1 as
select asdf, -- Some Comment
qwer,
qwerty, -- Some other comment
from table1
"""
)


--- My nice view 1 --
create or replace view view_1 as
select asdf, -- Some Comment
qwer,
qwerty, -- Some other comment
from table1



In [1143]:
assert_and_print(
    lowercase_query("""
-- Some comment --
Create Or rePlace tablE aS
sElEct asdf,
/* sOme CommEnt */
qwer
FroM table1
"""),
"""
-- Some comment --
create or replace table as
select asdf,
/* sOme CommEnt */
qwer
from table1
"""
)


-- Some comment --
create or replace table as
select asdf,
/* sOme CommEnt */
qwer
from table1



In [1144]:
assert_and_print(
    lowercase_query("""
-- Some comment --
Create Or rePlace tablE aS
sElEct asdf, replace('J', 'N', Asdf2) as Asdf3
/* sOme CommEnt */
qwer
FroM table1
"""),
"""
-- Some comment --
create or replace table as
select asdf, replace('J', 'N', asdf2) as asdf3
/* sOme CommEnt */
qwer
from table1
"""
)


-- Some comment --
create or replace table as
select asdf, replace('J', 'N', asdf2) as asdf3
/* sOme CommEnt */
qwer
from table1



In [1145]:
assert_and_print(
    lowercase_query("""
-- Some comment --
Create Or rePlace tablE aS
sElEct asdf, replace('J', 'N', Asdf2) as Asdf3
/* sOme CommEnt */
qwer
FroM table1 -- Some comment
"""),
"""
-- Some comment --
create or replace table as
select asdf, replace('J', 'N', asdf2) as asdf3
/* sOme CommEnt */
qwer
from table1 -- Some comment
"""
)


-- Some comment --
create or replace table as
select asdf, replace('J', 'N', asdf2) as asdf3
/* sOme CommEnt */
qwer
from table1 -- Some comment



## Add whitespaces between symbols

In [1146]:
#export
def add_whitespaces_query(s):
    "Add whitespaces between symbols (=!<>) for query `s` but not for comments"
    split_s = split_comment_quote(s)  # split by comment / non-comment, quote / non-quote
    for d in split_s:
        if not d["comment"] and not d["quote"]:
            d["string"] = add_whitespaces_between_symbols(d["string"])
    s = "".join([d["string"] for d in split_s])
    return s

In [1147]:
assert_and_print(
    add_whitespaces_query(
"""
create or replace table my_table as /* some comment 1=1, 1 =1 */
select asdf, case when asdf= 1 then '=' else 0 end as qwerty
from table1 as a
left join table2 as b on a.asdf= b.asdf
where asdf=1 and qwer =2
"""
    ),
"""
create or replace table my_table as /* some comment 1=1, 1 =1 */
select asdf, case when asdf = 1 then '=' else 0 end as qwerty
from table1 as a
left join table2 as b on a.asdf = b.asdf
where asdf = 1 and qwer = 2
"""    
)


create or replace table my_table as /* some comment 1=1, 1 =1 */
select asdf, case when asdf = 1 then '=' else 0 end as qwerty
from table1 as a
left join table2 as b on a.asdf = b.asdf
where asdf = 1 and qwer = 2



## Specific formatting and validation

Now we will format each statement individually

#### PARTITION BY

Helper function for format PARTITION BY Within SELECT

In [1148]:
#export
def format_partition_by(s, base_indentation):
    "Format PARTITION BY line in SELECT (DISTINCT)"
    orderby_involved = bool(re.search("order by", s, flags=re.I))
    if orderby_involved:
        split_s = re.split("(partition by.*)(order by.*)", s, flags=re.I)  # split PARTITION BY    
    else:
        split_s = re.split("(partition by.*)", s, flags=re.I)  # split PARTITION BY
    split_s = [sp for sp in split_s if sp != ""]
    begin_s = split_s[0]
    partition_by = split_s[1]
    indentation = base_indentation + 8
    # add newline after each comma (no comments) and indentation
    partition_by = add_newline_indentation(partition_by, indentation=indentation)
    # add new line and indentation after order by
    if orderby_involved:
        partition_by = "".join([partition_by, " "] + split_s[2:])
    partition_by = re.sub(
        r"\s(order by.*)", "\n" + " " * (base_indentation + 4) + r"\1", 
        partition_by, 
        flags=re.I
    )
    # combine begin of string with formatted partition by
    s = begin_s + "\n" + (base_indentation + 4) * " " + partition_by
    s = s.strip()
    # add newline and indentation before the last bracket 
    s = re.sub(r"(\)\s*(?:as\s*)*[^\s\)]+)$", "\n" + " " * base_indentation + r"\1", s, flags=re.I)
    return s

In [1149]:
sql = """qwer over (partition by asdf, qwer order by qwerty)"""
print(format_partition_by(sql, 0))

qwer over (
    partition by asdf,
        qwer
    order by qwerty)


#### Remove (mistake) comma at end of SELECT

In [1150]:
#export
def remove_wrong_end_comma(split_s):
    """Remove mistakenly placed commas at the end of SELECT statement using `split_s` with keys
    "string", "comment" and "quote"
    """
    reversed_split_s = split_s[::-1]  # reversed split_s
    first_noncomment = True
    # compile regex before loop
    replace_comma_without_comment = re.compile(r"([\w\d]+)[,]+(\s*)$")
    replace_comma_with_comment = re.compile(r"([\w\d]+)[,]+(\s*)$")
    for i, d in enumerate(reversed_split_s):
        s_aux = d["string"]
        if not d["comment"] and not d["quote"] and d["string"] != "" and first_noncomment:
            if i == 0:  # if end of select (no comment afterwards) remove whitespaces
                s_aux = replace_comma_without_comment.sub(r"\1", s_aux)
            else:  # if not end of select (because comment afterwards) do not remove whitespaces
                s_aux = replace_comma_with_comment.sub(r"\1\2", s_aux)
            first_noncomment = False
        # remove whitespaces between newline symbols
        s_aux = remove_whitespaces_newline(s_aux)
        reversed_split_s[i]["string"] = s_aux
    split_s_out = reversed_split_s[::-1]
    return split_s_out

In [1151]:
assert_and_print(
    split_apply_concat("select asdf, qwer, ", remove_wrong_end_comma),
    "select asdf, qwer"
)

select asdf, qwer


In [1152]:
assert_and_print(
    split_apply_concat("select asdf, qwer, -- some comment", remove_wrong_end_comma),
    "select asdf, qwer -- some comment"
)

select asdf, qwer -- some comment


In [1153]:
assert_and_print(
    split_apply_concat("select asdf, qwer, /* another comment */", remove_wrong_end_comma),
    "select asdf, qwer /* another comment */"
)

select asdf, qwer /* another comment */


In [1154]:
assert_and_print(
    split_apply_concat("select asdf, qwer,,,, /* more than 1 comma */", remove_wrong_end_comma),    
    "select asdf, qwer /* more than 1 comma */"
)

select asdf, qwer /* more than 1 comma */


#### Helper function for `case when`

In [1155]:
#export
def format_case_when(s, max_len=99):
    "Format case when statement in line `s`"
    # compile regex
    when_else_re = re.compile(r"(?<!case) ((?:when|else).*?)", flags=re.I)
    case_and_or = re.compile(r"\b((?:and|or))\b", flags=re.I)
    case_then = re.compile(r"\b(then)\b", flags=re.I)
    case_end = re.compile(r"\b(end)\b", flags=re.I)
    indent_in_brackets = re.compile(r"(\([^\)\(]*?)\s*(and|or)\b", flags=re.I)
    indent_between_and_reset = re.compile(r"(\bbetween\b)\s+(\S*?)\s+(\band\b)", flags=re.I)
    indent_between_and_indent = re.compile(r"(\bbetween\b)\s(\S*?)\s(\band\b)", flags=re.I)
    # prepare string
    s_strip = s.strip()
    field_indentation = len(s) - len(s_strip)
    split_s = split_comment_quote(s)
    for d in split_s:
        if not d["quote"]:  # assumed no comments given by select function
            d["string"] = when_else_re.sub(
                r"\n" + " " * (field_indentation + 4) + r"\1",
                d["string"]
            )
            # if len(d["string"]) + field_indentation + 10 > max_len: # 10 for "case when "
            #     d["string"] = case_and_or.sub(
            #         "\n" + " " * (field_indentation + 8) + r"\1",
            #         d["string"]
            #     )
            d["string"] = case_then.sub(
                "\n" + " " * (field_indentation + 4) + r"\1",
                d["string"]
            )
            d["string"] = case_end.sub("\n" + " " * field_indentation + r"\1", d["string"])
    
    s_code = "".join([d["string"] for d in split_s])
    s_code = "\n".join([case_and_or.sub("\n" + " " * (field_indentation + 8) + r"\1", sp) if len(sp) > max_len else sp for sp in s_code.split("\n")])
        # add more newline for and, or within brackets
    s_code = indent_in_brackets.sub(lambda x: x.group(1) + " " + x.group(2), s_code)
    s_code = indent_between_and_reset.sub(r"\1 \2 \3", s_code)
    s_code = "\n".join([indent_between_and_indent.sub(r"\1 \2\n" + " " * 12 + r"\3", sp) 
                        if len(sp) > max_len else sp for sp in s_code.split("\n")])

    return s_code

In [1156]:
"""def reformat_too_long_line(li, max_len=99):
    "Reformat too long line `li` if it is longer than `max_len` characters after stripping"
    if len(li) > max_len and "(" in li[:max_len]:
        #position of the "(" near to the max_len
        last_open_parenthesis = max_len - 1 - toolongsql[:max_len][::-1].find("(")
        k = -1
        last_close_paraenthesis = None
        for i, c in enumerate(li[last_open_parenthesis:]):
            if c == "(": # if there is a parenthesis not involving a subquery
                k += 1
            elif c == ")" and k == 0: # end position for subquery
                last_close_paraenthesis = i
                print(i)
                break
            elif c == ")":
                k -= 1
        
        # add newline
        li =  li[:last_open_parenthesis + 1] + "\n" + \
        li[last_open_parenthesis + 1: last_open_parenthesis + last_close_paraenthesis] + "\n" + \
        li[last_open_parenthesis + last_close_paraenthesis:]
        
        # add indent after each newline
        li = li.replace("\n", "\n" + " " * 4)
        return li
    else:
        return li"""

'def reformat_too_long_line(li, max_len=99):\n    "Reformat too long line `li` if it is longer than `max_len` characters after stripping"\n    if len(li) > max_len and "(" in li[:max_len]:\n        #position of the "(" near to the max_len\n        last_open_parenthesis = max_len - 1 - toolongsql[:max_len][::-1].find("(")\n        k = -1\n        last_close_paraenthesis = None\n        for i, c in enumerate(li[last_open_parenthesis:]):\n            if c == "(": # if there is a parenthesis not involving a subquery\n                k += 1\n            elif c == ")" and k == 0: # end position for subquery\n                last_close_paraenthesis = i\n                print(i)\n                break\n            elif c == ")":\n                k -= 1\n        \n        # add newline\n        li =  li[:last_open_parenthesis + 1] + "\n" +         li[last_open_parenthesis + 1: last_open_parenthesis + last_close_paraenthesis] + "\n" +         li[last_open_parenthesis + last_close_paraenthesis:]\

#### Helper function to reformat too long lines in SELECT statement

In [1157]:
#depreciated
def reformat_too_long_line(li, max_len=99):
    "Reformat too long line `li` if it is longer than `max_len` characters after stripping"
    indent_length = len(li) - len(li.strip()) + 4
    if len(li) > max_len:
        function_re = re.compile("[\w\d]+\(")
        if function_re.search(li):
            out_list = []
            in_function = False  # indicator for reformatting line with function
            k = 0  # counter for parenthesis
            j = 0  # indicator for string position
            quote_open1 = False  # quote '
            quote_open2 = False # quote "
            first_append = True
            for i, s in enumerate(li):
                if function_re.match(li[i-1:i+1]) and not quote_open1 and not quote_open2 and not in_function:
                    in_function = True
                    indentation = i+1
                elif s == "(" and not quote_open1 and not quote_open2 and in_function:
                    k += 1
                elif s == ")" and not quote_open1 and not quote_open2 and in_function:
                    k -= 1
                elif k == -1:
                    in_function = False
                elif s == "," and in_function and not quote_open1 and not quote_open2 and k == 0:
                    if first_append:
                        out_list.append(li[j:i+1].rstrip())
                        first_append = False
                    else:
                        out_list.append(li[j:i+1].strip())                        
                    j = i + 1
                elif s == "'" and not quote_open1 and not quote_open2:
                    quote_open1 = True
                elif s == "'" and quote_open1 and not quote_open2:
                    quote_open1 = False
                elif s == '"' and not quote_open1 and not quote_open2:
                    quote_open2 = True
                elif s == '"' and not quote_open1 and quote_open2:
                    quote_open2 = False
            out_list.append(li[j:].strip())
            if len(out_list) > 1:
                join_str = "\n" + " " * indent_length
                li = join_str.join(out_list)
        elif "in (" in li:
            out_list = []
            in_in = False
            j = 0  # indicator for string position
            quote_open1 = False  # quote '
            quote_open2 = False # quote "
            first_append = True
            lcol = 0  # line code column
            for i, s in enumerate(li):
                if "in (" in li[i-3:i+1] and not quote_open1 and not quote_open2 and not in_in:
                    in_in = True
                    indentation = i + 1
                elif s == ")" and not quote_open1 and not quote_open2 and in_in:
                    in_in = False
                elif s == "," and in_in and not quote_open1 and not quote_open2:
                    line_chunk = li[j:i+1]
                    lcol = len(line_chunk.strip()) + indentation
                    if first_append:
                        lcol = len(line_chunk.strip())                            
                        if lcol >= max_len:
                            out_list.append(line_chunk.rstrip())
                            first_append = False
                            j = i + 1                            
                    else:
                        lcol = len(line_chunk.strip()) + indentation
                        if lcol >= max_len:
                            out_list.append(line_chunk.strip())
                            j = i + 1
                elif s == "'" and not quote_open1 and not quote_open2:
                    quote_open1 = True
                elif s == "'" and quote_open1 and not quote_open2:
                    quote_open1 = False
                elif s == '"' and not quote_open1 and not quote_open2:
                    quote_open2 = True
                elif s == '"' and not quote_open1 and quote_open2:
                    quote_open2 = False
            out_list.append(li[j:].strip())
            if len(out_list) > 1:
                join_str = "\n" + " " * indent_length
                li = join_str.join(out_list)
    return li

In [1158]:
toolongsql = """SELECT round(coalesce((coalesce(coalesce(sum(extract(epoch FROM (job.finished_at - job.created_at))) FILTER (WHERE (job.state = 'completed')))) / 60), 0)::numeric, 8) AS time_int"""
print(reformat_too_long_line(toolongsql))

SELECT round(coalesce((coalesce(coalesce(sum(extract(epoch FROM (job.finished_at - job.created_at))) FILTER (WHERE (job.state = 'completed')))) / 60), 0)::numeric,
    8) AS time_int


In [1159]:
assert_and_print(
    reformat_too_long_line(
        "select concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer"
    ),
"""
select concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2),
    substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer
""".strip()
)

select concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2),
    substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer


In [1160]:
assert_and_print(
    reformat_too_long_line(
        "       concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer"
    ),
"""       concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2),
           substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer"""
)

       concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2),
           substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer


In [1161]:
assert_and_print(
    reformat_too_long_line(
        "            when concat('asdf', 'qwer', 'qwerqwerqwerqwerqwerqwerqwr', substr(concat('asdf', 'qwer'), 1, 2)) then 2"
    ),
"""            when concat('asdf',
                'qwer',
                'qwerqwerqwerqwerqwerqwerqwr',
                substr(concat('asdf', 'qwer'), 1, 2)) then 2"""
)

            when concat('asdf',
                'qwer',
                'qwerqwerqwerqwerqwerqwerqwr',
                substr(concat('asdf', 'qwer'), 1, 2)) then 2


In [1162]:
assert_and_print(
    reformat_too_long_line(
        "select case when asdf in (1234, 2345, 1234, 2354, 2345, 2352345, 245623462, 124123412, 124312341234) then 1"
    ),
"""
select case when asdf in (1234, 2345, 1234, 2354, 2345, 2352345, 245623462, 124123412, 124312341234) then 1
""".strip()
)

select case when asdf in (1234, 2345, 1234, 2354, 2345, 2352345, 245623462, 124123412, 124312341234) then 1


In [1163]:
assert_and_print(
    reformat_too_long_line(
        "select case when asdf in (1234, 2345, 1234, 2354, 2345, 2352345, 245623462, 124123412, 124312341234) then 1",
        max_len=40
    ),
"""
select case when asdf in (1234, 2345, 1234,
    2354, 2345, 2352345,
    245623462, 124123412,
    124312341234) then 1
""".strip()
)

select case when asdf in (1234, 2345, 1234,
    2354, 2345, 2352345,
    245623462, 124123412,
    124312341234) then 1


In [1164]:
assert_and_print(
    reformat_too_long_line(
        "select case when asdf in ('1234', '2345', '1234', '2354', '2345', '2352345', '245623462', '124123412', '124312341234') then 1",
        max_len=40
    ),
"""
select case when asdf in ('1234', '2345',
    '1234', '2354',
    '2345', '2352345',
    '245623462', '124123412',
    '124312341234') then 1
""".strip()
)

select case when asdf in ('1234', '2345',
    '1234', '2354',
    '2345', '2352345',
    '245623462', '124123412',
    '124312341234') then 1


In [1165]:
assert_and_print(
    reformat_too_long_line("select asdf"),
"select asdf"
)

select asdf


In [1166]:
assert_and_print(
    reformat_too_long_line("select asdf + asdf1 + asdf2 + asdf3 + asdf4 + asdf5 + asdf6 + asdf7 + asdf8 + asdf9"),
"select asdf + asdf1 + asdf2 + asdf3 + asdf4 + asdf5 + asdf6 + asdf7 + asdf8 + asdf9"
)

select asdf + asdf1 + asdf2 + asdf3 + asdf4 + asdf5 + asdf6 + asdf7 + asdf8 + asdf9


### SELECT

In [1167]:
#export
def format_select(s, max_len=99):
    "Format SELECT statement line `s`. If line is longer than `max_len` then reformat line"
    # remove [C] at end of SELECT
    s = re.sub(r"\[C\]$", "", s)
    split_s = split_comment_quote(s)  # split by comment / non-comment, quote / non-quote
    # if comma is found at the end of select statement then remove comma
    split_s = remove_wrong_end_comma(split_s)
    # check whether there is a SELECT DISTINCT in the code (not comments, not text in quotes)
    s_code = "".join([d["string"] for d in split_s if not d["comment"] and not d["quote"]])    
    # save the correct indentation: 16 for select distinct, 7 for only select
    indentation = 4
    # get only comment / non-comment
    split_comment = compress_dicts(split_s, ["comment"])
    # add newline after each comma and indentation (this is robust against quotes by construction)
    s = add_newline_indentation("".join([d["string"] for d in split_s if not d["comment"]]), 
                                indentation=indentation)
    # split by newline
    split_s = s.split("\n")
    # format case when
    split_s = [
        format_case_when(sp, max_len)
        if identify_in_sql("case when", sp) != []
        else sp
        for sp in split_s
    ]
    # add AS if missing
    as_regex = re.compile(r"(\)(?<!\bAS\b)\s?|\w(?<!\bSELECT\b)(?<!\bSELECT DISTINCT\b)(?<!\bAS\b)\s)(\w+|\'.+\')(,?)$", flags=re.I)
    split_s = [as_regex.sub(lambda x: x.group(1).rstrip() + " AS " + x.group(2) + x.group(3), sp)
               for sp in split_s]
    # join by newline
    s = "\n".join(split_s)
    # format PARTITION BY
    begin_s = s[0:indentation]
    split_s = s[indentation:].split("\n" + (" " * indentation))
    partition_by_re = re.compile("partition by", flags=re.I)
    split_s = [
        format_partition_by(line, base_indentation=indentation)
        if partition_by_re.search(line) else line
        for line in split_s
    ]
    s = begin_s + ("\n" + (" " * indentation)).join(split_s)
    
    # depreciated: used to reformat too long line in select only, and cannot handle too long subquery
    # s = "\n".join([
    #     reformat_too_long_line(li, max_len=max_len)
    #     for li in s.split("\n")        
    # ])
    # get comments and preceding string (non-comment)
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s, comment_dicts)    
    return s

Simple usage without comments

In [1168]:
assert_and_print(
    format_select("select aSdf, cast(qweR as numeric),  Asdf,qwer1"),
    """select aSdf,
    cast(qweR as numeric),
    Asdf,
    qwer1"""
)

select aSdf,
    cast(qweR as numeric),
    Asdf,
    qwer1


More advanced usage with comments in SELECT

In [1169]:
assert_and_print(
    format_select("select asdf, cast(qwer as numeric), -- some comment[C]ASDF, qwer1"),
    """select asdf,
    cast(qwer as numeric), -- some comment
    ASDF,
    qwer1"""
)

select asdf,
    cast(qwer as numeric), -- some comment
    ASDF,
    qwer1


In [1170]:
assert_and_print(
    format_select("select asdf, -- Some comment[C]asdforqwer -- Another comment[C]"),
"""
select asdf, -- Some comment
    asdforqwer -- Another comment
""".strip()
)

select asdf, -- Some comment
    asdforqwer -- Another comment


Correcting common mistake on the flow: comma at end of SELECT

In [1171]:
assert_and_print(
    format_select("select qwer1,   asdf,"),
    "select qwer1,\n    asdf"
)

select qwer1,
    asdf


In [1172]:
assert_and_print(
    format_select("SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]qwer2"),
"""
SELECT a.asdf,
    b.qwer, -- some comment here
    /* and here is a line comment inside select */
    qwer2
""".strip()
)

SELECT a.asdf,
    b.qwer, -- some comment here
    /* and here is a line comment inside select */
    qwer2


In [1173]:
assert_and_print(
    format_select("SELECT a.asdf,[CS]/* and here is a line comment inside select */"),
"""
SELECT a.asdf
       /* and here is a line comment inside select */
""".strip()
)

SELECT a.asdf
       /* and here is a line comment inside select */


Line comment with --

In [1174]:
assert_and_print(
    format_select("SELECT a.asdf, b.qwer, -- some comment here[C][CS]-- and here is a line comment inside select[C]qwer2"),
"""
SELECT a.asdf,
    b.qwer, -- some comment here
    -- and here is a line comment inside select
    qwer2
""".strip()
)

SELECT a.asdf,
    b.qwer, -- some comment here
    -- and here is a line comment inside select
    qwer2


Correcting comma at end of SELECT but having a comment in the last field

In [1175]:
assert_and_print(
    format_select("select qwer1 as qwer2,   asdf as asdf3, -- this field"),
    "select qwer1 as qwer2,\n    asdf as asdf3 -- this field"
)

select qwer1 as qwer2,
    asdf as asdf3 -- this field


In [1176]:
assert_and_print(
    format_select("select qwer1,   asdf, /* this field */"),
    "select qwer1,\n    asdf /* this field */"
)

select qwer1,
    asdf /* this field */


With `case when` conditions

In [1177]:
assert_and_print(
    format_select("select qwer1, case when abc = 1 then 'a' when abc = 2 then 'b' else 'c' end qwer2"),
    """
select qwer1,
    case when abc = 1 
        then 'a'
        when abc = 2 
        then 'b'
        else 'c' 
    end AS qwer2
    """.strip()    
)

select qwer1,
    case when abc = 1 
        then 'a'
        when abc = 2 
        then 'b'
        else 'c' 
    end AS qwer2


In [1178]:
assert_and_print(
    format_select("select qwer1, case when abc = 1 then 'a' -- first condition[C]" +
                  "when abc = 2 then 'b' -- second condition[C]" +
                  "else 'c' end as qwer2, /* else condition */[C]"
                  "asdf3"
),
    """
select qwer1,
    case when abc = 1 
        then 'a' -- first condition
        when abc = 2 
        then 'b' -- second condition
        else 'c' 
    end as qwer2, /* else condition */
    asdf3
    """.strip()    
)

select qwer1,
    case when abc = 1 
        then 'a' -- first condition
        when abc = 2 
        then 'b' -- second condition
        else 'c' 
    end as qwer2, /* else condition */
    asdf3


Bad formatted `case when` condition

In [1179]:
assert_and_print(
    format_select("select qwer1, case when abc <= 1 then 'a' -- first condition[C]" +
                  "when abc = 2 then 'b' -- second condition[C]" +
                  "else 'c' end as qwer2, -- else condition[C]"
                  "asdf3"
),
    """
select qwer1,
    case when abc <= 1 
        then 'a' -- first condition
        when abc = 2 
        then 'b' -- second condition
        else 'c' 
    end as qwer2, -- else condition
    asdf3
    """.strip()    
)

select qwer1,
    case when abc <= 1 
        then 'a' -- first condition
        when abc = 2 
        then 'b' -- second condition
        else 'c' 
    end as qwer2, -- else condition
    asdf3


`case when` and comment after condition

In [1180]:
assert_and_print(
    format_select("select qwer1, case when abc <= 1 and -- first condition[C]" +
                  "abc >= -1 then 'a' -- second condition[C]" +
                  "else 'c' end as qwer2, -- else condition[C]"
                  "asdf3"
),
    """
select qwer1,
    case when abc <= 1 and abc >= -1  -- first condition
        then 'a' -- second condition
        else 'c' 
    end as qwer2, -- else condition
    asdf3
    """.strip()    
)

select qwer1,
    case when abc <= 1 and abc >= -1  -- first condition
        then 'a' -- second condition
        else 'c' 
    end as qwer2, -- else condition
    asdf3


`case when` in comments

In [1181]:
assert_and_print(
    format_select("select qwer1, case when a3215614834984513155696749788888888888888888888bc <= 1 and -- first condition case when[C]" +
                  "abc >= -1 then 'a' -- second condition case when[C]" +
                  "else 'c' end as qwer2, -- else condition[C]"
                  "asdf3"
),
"""
select qwer1,
    case when a3215614834984513155696749788888888888888888888bc <= 1 and abc >= -1  -- first condition case when
        then 'a' -- second condition case when
        else 'c' 
    end as qwer2, -- else condition
    asdf3
""".strip()    
)

select qwer1,
    case when a3215614834984513155696749788888888888888888888bc <= 1 and abc >= -1  -- first condition case when
        then 'a' -- second condition case when
        else 'c' 
    end as qwer2, -- else condition
    asdf3


In [1182]:
assert_and_print(
    format_select("select asdf, case when asdf >= 1 and asdf <= 10 and" +
                  " substr(qwer, 1, 2) = 'abc' and substr(qwer, 3, 2) = 'qwerty'" +
                  " then 1 else 0 end as case_field, asdf2"
    ),
"""
select asdf,
    case when asdf >= 1 
            and asdf <= 10 
            and substr(qwer, 1, 2) = 'abc' 
            and substr(qwer, 3, 2) = 'qwerty' 
        then 1
        else 0 
    end as case_field,
    asdf2
""".strip()
)

select asdf,
    case when asdf >= 1 
            and asdf <= 10 
            and substr(qwer, 1, 2) = 'abc' 
            and substr(qwer, 3, 2) = 'qwerty' 
        then 1
        else 0 
    end as case_field,
    asdf2


2 `case when ... end`

In [1183]:
assert_and_print(
    format_select(
        "select asdf, cast(case when asdf = 1 then 0 else 1 end as int) as qwer, " +
        "case when asdf = 0 then 1 else 0 end as qwer2"
    ),
"""
select asdf,
    cast(case when asdf = 1 
        then 0
        else 1 
    end as int) as qwer,
    case when asdf = 0 
        then 1
        else 0 
    end as qwer2
""".strip()
)

select asdf,
    cast(case when asdf = 1 
        then 0
        else 1 
    end as int) as qwer,
    case when asdf = 0 
        then 1
        else 0 
    end as qwer2


With functions in SELECT

In [1184]:
assert_and_print(
    format_select("select aSdf, substr(qweR, 2) as qwer,  Asdf,qwer1"),
    "select aSdf,\n    substr(qweR, 2) as qwer,\n    Asdf,\n    qwer1"
)

select aSdf,
    substr(qweR, 2) as qwer,
    Asdf,
    qwer1


In [1185]:
assert_and_print(
    format_select(
"""
select car_id,
       avg(price) as avg_price,
"""
    ),
"""
select car_id,
    avg(price) as avg_price
""".strip()
)

select car_id,
    avg(price) as avg_price


With function in SELECT and case when

In [1186]:
assert_and_print(
    format_select("select qwer1, cast(case when asdf = 'J' then 1 else 0 end) as qwer2, qwer3"),
"""
select qwer1,
    cast(case when asdf = 'J' 
        then 1
        else 0 
    end) as qwer2,
    qwer3
""".strip()    
)

select qwer1,
    cast(case when asdf = 'J' 
        then 1
        else 0 
    end) as qwer2,
    qwer3


In [1187]:
assert_and_print(
    format_select("select qwer1, cast(substr(case when asdf = 'CASE WHEN' then 1 else 0 end, 2, 1)) as qwer2, qwer3"),
"""
select qwer1,
    cast(substr(case when asdf = 'CASE WHEN' 
        then 1
        else 0 
    end, 2, 1)) as qwer2,
    qwer3
""".strip()    
)

select qwer1,
    cast(substr(case when asdf = 'CASE WHEN' 
        then 1
        else 0 
    end, 2, 1)) as qwer2,
    qwer3


In [1188]:
assert_and_print(
    format_select("select qwer1, substr(qwer, case when asdf = 'J' then 1 else 0 end, 4) as qwer2, qwer3"),
"""
select qwer1,
    substr(qwer, case when asdf = 'J' 
        then 1
        else 0 
    end, 4) as qwer2,
    qwer3
""".strip()    
)

select qwer1,
    substr(qwer, case when asdf = 'J' 
        then 1
        else 0 
    end, 4) as qwer2,
    qwer3


In [1189]:
assert_and_print(
    format_select("select qwer1, substr('blabla', case when asdf = 'J' then 1 else 0 end, 4) as qwer2, qwer3"),
"""
select qwer1,
    substr('blabla', case when asdf = 'J' 
        then 1
        else 0 
    end, 4) as qwer2,
    qwer3
""".strip()    
)

select qwer1,
    substr('blabla', case when asdf = 'J' 
        then 1
        else 0 
    end, 4) as qwer2,
    qwer3


With `SELECT DISTINCT`

In [1190]:
assert_and_print(
    format_select("select distinct asdf, qwer, qwer2,"),
"""
select distinct asdf,
    qwer,
    qwer2
""".strip()
)

select distinct asdf,
    qwer,
    qwer2


In [1191]:
assert_and_print(
    format_select("select distinct asdf, case when asdf = 1 then 1 else 2 end as qwerty, qwer2,"),
"""
select distinct asdf,
    case when asdf = 1 
        then 1
        else 2 
    end as qwerty,
    qwer2
""".strip()
)

select distinct asdf,
    case when asdf = 1 
        then 1
        else 2 
    end as qwerty,
    qwer2


With `PARTITION BY`

In [1192]:
assert_and_print(
    format_select("select asdf, lead(asdf) over (partition by qwer, asdf2 order by qwer2) as qwer3, qwerty,"),
"""
select asdf,
    lead(asdf) over (
        partition by qwer,
            asdf2
        order by qwer2
    ) as qwer3,
    qwerty
""".strip()
)

select asdf,
    lead(asdf) over (
        partition by qwer,
            asdf2
        order by qwer2
    ) as qwer3,
    qwerty


In [1193]:
assert_and_print(
    format_select("select asdf, lead(asdf) over (partition by asdf, qwer order by qwer), cast(qwer as numeric), -- some comment[C]ASDF, "),
"""
select asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF
""".strip()
)

select asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF


In [1194]:
assert_and_print(
    format_select("select asdf, lead(asdf) over (partition by asdf, qwer order by qwer, qwer2), cast(qwer as numeric), -- some comment[C]ASDF, "),
"""
select asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer, qwer2
    ),
    cast(qwer as numeric), -- some comment
    ASDF
""".strip()
)

select asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer, qwer2
    ),
    cast(qwer as numeric), -- some comment
    ASDF


In [1195]:
assert_and_print(
    format_select("select asdf, lead(asdf, 1, 2) OVER (PARTITION BY snr, qwer ORDER BY asdf, qwer)"),
"""
select asdf,
    lead(asdf, 1, 2) OVER (
        PARTITION BY snr,
            qwer
        ORDER BY asdf, qwer)
""".strip()
)

select asdf,
    lead(asdf, 1, 2) OVER (
        PARTITION BY snr,
            qwer
        ORDER BY asdf, qwer)


In [1196]:
assert_and_print(
    format_select("select DISTINCT asdf, lead(asdf) over (partition by asdf, qwer order by qwer), cast(qwer as numeric), -- some comment[C]ASDF, "),
"""
select DISTINCT asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF
""".strip()
)

select DISTINCT asdf,
    lead(asdf) over (
        partition by asdf,
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF


With comments within `PARTITION BY`

In [1197]:
assert_and_print(
    format_select("select DISTINCT asdf, lead(asdf) over (partition by asdf, -- some comment[C]qwer order by qwer), cast(qwer as numeric), -- some comment[C]ASDF, "),
"""
select DISTINCT asdf,
    lead(asdf) over (
        partition by asdf, -- some comment
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF
""".strip()
)

select DISTINCT asdf,
    lead(asdf) over (
        partition by asdf, -- some comment
            qwer
        order by qwer
    ),
    cast(qwer as numeric), -- some comment
    ASDF


With too long lines

In [1198]:
assert_and_print(
    format_select(
        "select asdf, concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer"
    ),
"""
select asdf,
    concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer
""".strip()
)

select asdf,
    concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer


In [1199]:
assert_and_print(
    format_select(
        "select asdf, case when asdf in (1231, 123123, 12312, 21412431234, 12341234123, 12341234, 12341234, 1234) then 1 else 0 end as qwer, asdf2"
    ),
"""
select asdf,
    case when asdf in (1231, 123123, 12312, 21412431234, 12341234123, 12341234, 12341234, 1234) 
        then 1
        else 0 
    end as qwer,
    asdf2
""".strip()
)

select asdf,
    case when asdf in (1231, 123123, 12312, 21412431234, 12341234123, 12341234, 12341234, 1234) 
        then 1
        else 0 
    end as qwer,
    asdf2


Long line with `in (...)` and quotes

In [1200]:
assert_and_print(
    format_select(
        "select asdf, case when asdf in ('1231', '123123', '12312', '21412431234', '12341234123', '12341234', '12341234', '1234') then 1 else 0 end as qwer, asdf2"
    ),
"""
select asdf,
    case when asdf in ('1231', '123123', '12312', '21412431234', '12341234123', '12341234', '12341234', '1234') 
        then 1
        else 0 
    end as qwer,
    asdf2
""".strip()
)

select asdf,
    case when asdf in ('1231', '123123', '12312', '21412431234', '12341234123', '12341234', '12341234', '1234') 
        then 1
        else 0 
    end as qwer,
    asdf2


### FROM

In [1201]:
#export
def format_from(s, **kwargs):
    "Format FROM statement line `s`"
    
    split_s = split_comment_quote(s)
    split_comment = compress_dicts(split_s, ["comment"])
    
    indentation = 4
    s = add_newline_indentation("".join([d["string"] for d in split_s if not d["comment"]]), 
                                indentation=indentation)
    split_s = s.split("\n")
    # add AS if no AS exists but with custom name 
    as_regex = re.compile(r"(\)(?<!\bAS\b)\s?|\w(?<!\bFROM\b)(?<!\bAS\b)\s)(\w+|\'.+\')(,?)$", flags=re.I)
    split_s = [as_regex.sub(lambda x: x.group(1).rstrip() + " AS " + x.group(2) + x.group(3), sp)
               for sp in split_s]
    s = "\n".join(split_s)
    
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s, comment_dicts)  
    return s 

In [1202]:
assert_and_print(format_from("from table1 t1, /* some comment */ table2 t2, table3 as t3"), 
                 """from table1 AS t1, /* some comment */
    table2 AS t2,
    table3 as t3
""".strip())

from table1 AS t1, /* some comment */
    table2 AS t2,
    table3 as t3


### SUBSTRING ... FROM (special case for FROM)

### JOIN (LEFT/RIGHT/etc.)

In [1203]:
# export
def format_join(s, **kwargs):
    "Format JOIN statement line `s`"
    s = re.sub(  # add indentation
        r"\b((?:natural\s|full\s)?(?:left\s|right\s|inner\s|outer\s|cross\s)?join)\b",
        r"    \1",
        s,
        flags=re.I
    )
    return s

In [1204]:
assert_and_print(
    format_join("join t1"), 
"""
    join t1
""".strip("\n")
)

    join t1


In [1205]:
assert_and_print(
    format_join("natural left join t1"), 
"""
    natural left join t1
""".strip("\n")
)

    natural left join t1


In [1206]:
split_comment_quote("join a \non b and c or d and e and a between c and d")

[{'string': 'join a \non b and c or d and e and a between c and d',
  'comment': False,
  'quote': False}]

### ON

In [1207]:
#export
def format_on(s, max_len = 99):
    "Format ON statement line `s`"
    indentation = 8
    s = " " * indentation + s  # add indentation
    split_s = split_comment_quote(s)
    # define regex before loop
    indent_and_or = re.compile(r"\s*\b(and|or)\b", flags=re.I)
    indent_in_brackets = re.compile(r"(\([^\)\(]*?)\s*(and|or)\b", flags=re.I)
    indent_between_and_reset = re.compile(r"(\bbetween\b)\s+(\S*?)\s+(\band\b)", flags=re.I)
    indent_between_and_indent = re.compile(r"(\bbetween\b)\s(\S*?)\s(\band\b)", flags=re.I)
    for d in split_s:
        if not d["comment"] and not d["quote"]:
            s_aux = d["string"]
            s_aux = indent_and_or.sub(lambda x: "\n" + " " * 8 + x.group(1), s_aux)  # add newline and indentation for and ,or
            d["string"] = s_aux
    # get split comment / non comment
    split_comment = compress_dicts(split_s, ["comment"])
    s_code = "".join([d["string"] for d in split_s if not d["comment"]])

    # add more newline for and, or within brackets
    s_code = indent_in_brackets.sub(lambda x: x.group(1) + " " + x.group(2), s_code)

    # add newline and indentation for between_and (experimental) if too long
    s_code = indent_between_and_reset.sub(r"\1 \2 \3", s_code)
    s_code = "\n".join([indent_between_and_indent.sub(r"\1 \2\n" + " " * 12 + r"\3", sp) 
                        if len(sp) > max_len else sp for sp in s_code.split("\n")])
            
    # strip lines of code from the right
    s_code = "\n".join([sp.rstrip() for sp in s_code.split("\n")])
    # get comments and preceding string (non-comment)
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s_code, comment_dicts)
    return s

In [1208]:
assert_and_print(
    format_on("on a.asdf = b.asdf /* some comment */[C]"), 
"""
        on a.asdf = b.asdf /* some comment */
""".strip("\n")
)

        on a.asdf = b.asdf /* some comment */


In [1209]:
assert_and_print(
    format_on("on a.asdf = b.asdf and a.qwer = b.qwer or a.qwer = b.qqqq"), 
"""
        on a.asdf = b.asdf
        and a.qwer = b.qwer
        or a.qwer = b.qqqq
""".strip("\n")
)

        on a.asdf = b.asdf
        and a.qwer = b.qwer
        or a.qwer = b.qqqq


In [1210]:
assert_and_print(
    format_on("on a.asdf = b.asdf and a.qwer = b.qwer OR a.qwer2 between b.qwer2 and a.qwas"), 
"""
        on a.asdf = b.asdf
        and a.qwer = b.qwer
        OR a.qwer2 between b.qwer2 and a.qwas
""".strip("\n")
)

        on a.asdf = b.asdf
        and a.qwer = b.qwer
        OR a.qwer2 between b.qwer2 and a.qwas


In [1211]:
assert_and_print(
    format_on("on a.asdf = b.asdf and a.qwer between b.qwer and d.32dfa OR a.qwer2dskjfhdsalkjfdsadalfah between b.qwedafhkdfjsakhfljdsahlfr2 and a.qwaasjdfgdsakjfdsfkas"), 
"""
        on a.asdf = b.asdf
        and a.qwer between b.qwer and d.32dfa
        OR a.qwer2dskjfhdsalkjfdsadalfah between b.qwedafhkdfjsakhfljdsahlfr2
            and a.qwaasjdfgdsakjfdsfkas
""".strip("\n")
)

        on a.asdf = b.asdf
        and a.qwer between b.qwer and d.32dfa
        OR a.qwer2dskjfhdsalkjfdsadalfah between b.qwedafhkdfjsakhfljdsahlfr2
            and a.qwaasjdfgdsakjfdsfkas


With comments and bad formatted

In [1212]:
assert_and_print(
    format_on("on a.asdf = b.asdf -- some comment[C]and a.qwer = b.qwer or /* another comment */[C]a.qwer2 = b.qwer2"), 
"""
        on a.asdf = b.asdf -- some comment
        and a.qwer = b.qwer /* another comment */
        or a.qwer2 = b.qwer2
""".strip("\n")
)

        on a.asdf = b.asdf -- some comment
        and a.qwer = b.qwer /* another comment */
        or a.qwer2 = b.qwer2


### AS JOIN

In [1213]:
#export
def format_on(s, max_len = 99):
    "Format ON statement line `s`"
    indentation = 8
    s = " " * indentation + s  # add indentation
    split_s = split_comment_quote(s)
    # define regex before loop
    indent_and_or = re.compile(r"\s*\b(and|or)\b", flags=re.I)
    indent_in_brackets = re.compile(r"(\([^\)\(]*?)\s*(and|or)\b", flags=re.I)
    indent_between_and_reset = re.compile(r"(\bbetween\b)\s+(\S*?)\s+(\band\b)", flags=re.I)
    indent_between_and_indent = re.compile(r"(\bbetween\b)\s(\S*?)\s(\band\b)", flags=re.I)
    for d in split_s:
        if not d["comment"] and not d["quote"]:
            s_aux = d["string"]
            s_aux = indent_and_or.sub(lambda x: "\n" + " " * 8 + x.group(1), s_aux)  # add newline and indentation for and ,or
            d["string"] = s_aux
    # get split comment / non comment
    split_comment = compress_dicts(split_s, ["comment"])
    s_code = "".join([d["string"] for d in split_s if not d["comment"]])
    # add more newline for and, or within brackets
    s_code = indent_in_brackets.sub(lambda x: x.group(1) + " " + x.group(2), s_code)
    # add newline and indentation for between_and (experimental) if too long
    s_code = indent_between_and_reset.sub(r"\1 \2 \3", s_code)
    s_code = "\n".join([indent_between_and_indent.sub(r"\1 \2\n" + " " * 12 + r"\3", sp) 
                        if len(sp) > max_len else sp for sp in s_code.split("\n")])
            
    # strip lines of code from the right
    s_code = "\n".join([sp.rstrip() for sp in s_code.split("\n")])
    # get comments and preceding string (non-comment)
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s_code, comment_dicts)
    return s

### WHERE

In [1214]:
#export
def format_where(s, max_len = 99):
    "Format WHERE statement line `s`"
    #s = re.sub(r"(where )", r"\1 ", s, flags=re.I)  # add indentation after WHERE
    # split by comment / non comment, quote / non-quote
    split_s = split_comment_quote(s)
    # define regex before loop
    indent_and_or = re.compile(r"\s*\b(and|or)\b", flags=re.I)
    indent_in_brackets = re.compile(r"(\((?!\s*where)[^\)\(]*?)\s*(and|or)\b", flags=re.I)
    indent_between_and_reset = re.compile(r"(\bbetween\b)\s+(\S*?)\s+(\band\b)", flags=re.I)
    indent_between_and_indent = re.compile(r"(\bbetween\b)\s(\S*?)\s(\band\b)", flags=re.I)
    for d in split_s:
        if not d["comment"] and not d["quote"]:
            s_aux = d["string"]
            s_aux = indent_and_or.sub(lambda x: "\n" + " " * 4 + x.group(1), s_aux)  # add newline and indentation for and ,or
            d["string"] = s_aux
    # get split comment / non comment
    split_comment = compress_dicts(split_s, ["comment"])
    s_code = "".join([d["string"] for d in split_s if not d["comment"]])

    # replace newline by space for and, or within brackets
    s_code = indent_in_brackets.sub(lambda x: x.group(1) + " " + x.group(2), s_code)

    # add newline and indentation for between_and (experimental) if too long
    s_code = indent_between_and_reset.sub(r"\1 \2 \3", s_code)
    s_code = "\n".join([indent_between_and_indent.sub(r"\1 \2\n" + " " * 8 + r"\3", sp) 
                        if len(sp) > max_len else sp for sp in s_code.split("\n")])


    # strip from the right each code line
    s_code = "\n".join([sp.rstrip() for sp in s_code.split("\n")])
    # get comments and preceding string (non-comment)
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s_code, comment_dicts)
    return s

In [1215]:
test = "WHERE act.activity IN :activity_names AND act.created BETWEEN :start_date AND (:end_date)::date + '23:59:59.999999'::time"
print(format_where(test))

WHERE act.activity IN :activity_names
    AND act.created BETWEEN :start_date AND (:end_date)::date + '23:59:59.999999'::time


In [1216]:
#export
def format_filter_where(s, **kwargs):
    "Format WHERE statement line `s`"
    s = re.sub(r"(filter)\s+\((where)\s+", r"\1 (\n\2 ", s, flags=re.I)  # add indentation after WHERE
    return s

In [1217]:
assert_and_print(
    format_where(
        "WHERE (act.activity IN :grabber_export AND (act.payload -> 'meta' ->> 'source' = 'email-suggestion' OR act.payload ->> 'source' = 'email-suggestions')) OR act.activity = 'GRABBER_SUGGESTIONS_DISCARD'"
    ), """WHERE (act.activity IN :grabber_export AND (act.payload -> 'meta' ->> 'source' = 'email-suggestion' OR act.payload ->> 'source' = 'email-suggestions'))
    OR act.activity = 'GRABBER_SUGGESTIONS_DISCARD'"""
)

WHERE (act.activity IN :grabber_export AND (act.payload -> 'meta' ->> 'source' = 'email-suggestion' OR act.payload ->> 'source' = 'email-suggestions'))
    OR act.activity = 'GRABBER_SUGGESTIONS_DISCARD'


In [1218]:
assert_and_print(
    format_where(
        "WHERE asdf = 1 and -- and some comment[C]qwer = 1 or blabla = 'asdf'"
    ), """WHERE asdf = 1 -- and some comment
    and qwer = 1
    or blabla = 'asdf'"""
)

WHERE asdf = 1 -- and some comment
    and qwer = 1
    or blabla = 'asdf'


In [1219]:
assert_and_print(
    format_where(
        "WHERE (activity = 'DEEPINTEGRATION_UPDATER_MERGE_VIEW_SAVE') AND (act.organization_id = 1 OR usr.username NOT LIKE '%@snapaddy.com') AND (act.payload -> 'meta' ->> 'savedFieldCount' IS NOT NULL) AND created BETWEEN :start_date AND :end_date OR (:excluded_orga_id IS NULL AND act.organization_id <> :excluded_orga_id);"
    ), 
"""
WHERE (activity = 'DEEPINTEGRATION_UPDATER_MERGE_VIEW_SAVE')
    AND (act.organization_id = 1 OR usr.username NOT LIKE '%@snapaddy.com')
    AND (act.payload -> 'meta' ->> 'savedFieldCount' IS NOT NULL)
    AND created BETWEEN :start_date AND :end_date
    OR (:excluded_orga_id IS NULL AND act.organization_id <> :excluded_orga_id);
""".strip()
)

WHERE (activity = 'DEEPINTEGRATION_UPDATER_MERGE_VIEW_SAVE')
    AND (act.organization_id = 1 OR usr.username NOT LIKE '%@snapaddy.com')
    AND (act.payload -> 'meta' ->> 'savedFieldCount' IS NOT NULL)
    AND created BETWEEN :start_date AND :end_date
    OR (:excluded_orga_id IS NULL AND act.organization_id <> :excluded_orga_id);


## Format all statements

In [1220]:
#export
def format_statement_line(s, **kwargs):
    "Format statement line `s`"
    statement_funcs = {
        r"^select": format_select,
        r"^from": format_from,
        r"^\w*\s?\w*\s?join": format_join,
        r"^on": format_on,
        r"filter \(where": format_filter_where,
        r"^where": format_where,
        r"\(\nwhere": format_where,
    }
    for key, format_func in statement_funcs.items():
        if re.search(key, s, flags=re.I):
            s = format_func(s, **kwargs)
    return s

In [1221]:
assert_and_print(
    format_statement_line("select asdf, qwer"),
    """
select asdf,
    qwer
""".strip())

select asdf,
    qwer


In [1222]:
assert_and_print(
    format_statement_line("join table1 as abc"),
    "    join table1 as abc"
)

    join table1 as abc


In [1223]:
assert_and_print(
    format_statement_line("left join table1 as abc"),
    "    left join table1 as abc"
)

    left join table1 as abc


In [1224]:
assert_and_print(
    format_statement_line("natural left join table1 as abc"),
    "    natural left join table1 as abc"
)

    natural left join table1 as abc


In [1225]:
assert_and_print(
    format_statement_line("where asdf = 1 and qwer = 'things' and blabla = 0 or stuff = -1 AND asdf between kfj and ifffg"),
    """
where asdf = 1
    and qwer = 'things'
    and blabla = 0
    or stuff = -1
    AND asdf between kfj and ifffg
    """.strip())

where asdf = 1
    and qwer = 'things'
    and blabla = 0
    or stuff = -1
    AND asdf between kfj and ifffg


In [1226]:
#export
def format_statements(s, **kwargs):
    "Format statements lines `s`"
    statement_lines = s.split("\n")
    formatted_lines = [
        format_statement_line(line, **kwargs) for line in statement_lines
    ]
    formatted_s = "\n".join(formatted_lines)
    return formatted_s

In [1227]:
assert_and_print(
    format_statements("select asdf, qwer\nfrom table1", max_len=99),
"""
select asdf,
    qwer
from table1
""".strip()
)

select asdf,
    qwer
from table1


In [1228]:
#export 
def add_join_as(s, **kwargs):
    as_on_regex = re.compile(r"(\)(?<!\bAS\b)\s?|\w(?<!\bJOIN\b)(?<!\bAS\b)\s)(\w+|\'.+\')(\s+\bON\b)")
    s = as_on_regex.sub(lambda x: x.group(1).rstrip() + " AS " + x.group(2) + x.group(3), s)
    return s

In [1229]:
assert_and_print(
    add_join_as("""CREATE OR REPLACE TABLE mytable AS
SELECT a.asdf,
    b.qwer,
    substr(c.asdf, 1, 2) AS substr_asdf,
    CASE WHEN a.asdf = 1
        THEN 'b'
        WHEN b.qwer = 2
        THEN 'c'
        ELSE 'd'
    END AS new_field,
    b.asdf2
FROM table1 AS a
    LEFT JOIN table2 b
        ON a.asdf = b.asdf
    INNER JOIN table3 AS c
        ON a.asdf = c.asdf
        AND a.qwer = b.qwer
    JOIN (
        SELECT t.uut from tablet
        WHERE t.asg = 5
    ) t
        ON a.dffg = t.dffg
WHERE a.asdf = 1
    AND b.qwer = 2
    AND a.asdf <= 1
    OR b.qwer >= 5
GROUP BY a.asdf"""), 
"""CREATE OR REPLACE TABLE mytable AS
SELECT a.asdf,
    b.qwer,
    substr(c.asdf, 1, 2) AS substr_asdf,
    CASE WHEN a.asdf = 1
        THEN 'b'
        WHEN b.qwer = 2
        THEN 'c'
        ELSE 'd'
    END AS new_field,
    b.asdf2
FROM table1 AS a
    LEFT JOIN table2 AS b
        ON a.asdf = b.asdf
    INNER JOIN table3 AS c
        ON a.asdf = c.asdf
        AND a.qwer = b.qwer
    JOIN (
        SELECT t.uut from tablet
        WHERE t.asg = 5
    ) AS t
        ON a.dffg = t.dffg
WHERE a.asdf = 1
    AND b.qwer = 2
    AND a.asdf <= 1
    OR b.qwer >= 5
GROUP BY a.asdf"""
)

CREATE OR REPLACE TABLE mytable AS
SELECT a.asdf,
    b.qwer,
    substr(c.asdf, 1, 2) AS substr_asdf,
    CASE WHEN a.asdf = 1
        THEN 'b'
        WHEN b.qwer = 2
        THEN 'c'
        ELSE 'd'
    END AS new_field,
    b.asdf2
FROM table1 AS a
    LEFT JOIN table2 AS b
        ON a.asdf = b.asdf
    INNER JOIN table3 AS c
        ON a.asdf = c.asdf
        AND a.qwer = b.qwer
    JOIN (
        SELECT t.uut from tablet
        WHERE t.asg = 5
    ) AS t
        ON a.dffg = t.dffg
WHERE a.asdf = 1
    AND b.qwer = 2
    AND a.asdf <= 1
    OR b.qwer >= 5
GROUP BY a.asdf


### Format multiline comments

In [1230]:
#export
def format_multiline_comments(s):
    "Format multiline comments by replacing multiline comment [CI] by newline and adding indentation"
    split_s = s.split("\n")
    split_out = []
    for sp in split_s:  # loop on query lines
        if re.search(r"\[CI\]", sp):
            indentation = re.search(r"\/\*", sp).start() + 3
            sp_indent = re.sub(r"\[CI\]", "\n" + " " * indentation, sp)
            split_out.append(sp_indent)
        else:
            split_out.append(sp)
    s = "\n".join(split_out)
    return s

### Add semicolon at the end of query

In [1231]:
#export
def add_semicolon(s):
    "Add a semicolon at the of query `s`"
    split_s = s.split("\n")
    last_line = split_s[-1]
    split_c = split_comment(last_line)
    if len(split_c) == 1:
        split_s[-1] = last_line + ";"
    else:
        split_c[0]["string"] = re.sub("(.*[\w\d]+)(\s*)$", r"\1;\2", split_c[0]["string"])
        split_s[-1] = "".join([d["string"] for d in split_c])
    return "\n".join(split_s)

In [1232]:
assert_and_print(
add_semicolon("""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1 /* something */
""".strip()),
"""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1; /* something */
""".strip()
)

CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1; /* something */


In [1233]:
assert_and_print(
add_semicolon("""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1 -- some thing
""".strip()),
"""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1; -- some thing
""".strip()
)

CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1; -- some thing


In [1234]:
assert_and_print(
add_semicolon("""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1
""".strip()),
"""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1;
""".strip()
)

CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table1;


##  Putting everything together

to format a simple query without subqueries

In [1235]:
#export
def format_simple_sql(s, semicolon=False, max_len=99):
    "Format a simple SQL query without subqueries `s`"
    s = lowercase_query(s)  # everything lowercased but not the comments
    s = preformat_statements(s)  # add breaklines for the main statements
    s = add_whitespaces_query(s)  # add whitespaces between symbols in query
    s = format_statements(s, max_len=max_len)  # format statements
    s = add_join_as(s) # special handling for JOIN ... AS ... ON
    s = re.sub(r"\[C\]", "", s)  # replace remaining [C]
    s = re.sub(r"\[CS\]", "\n", s)  # replace remaining [CS]
    s = re.sub(r"\s+\n", "\n", s)  # replace redundant whitespaces before newline
    s = format_multiline_comments(s)  # format multline comments
    s = s.strip()  # strip query
    if semicolon:
        s = add_semicolon(s)
    return s

In [1236]:
assert_and_print(
    format_simple_sql(example_sql),
    expected_sql
)

CREATE OR REPLACE TABLE mytable AS -- Mytable example
/* multi line
   comment */
SELECT a.asdf,
    -- some line comment
    b.qwer, -- some comment here
    /* and here is a line comment inside select */
    substr(c.asdf, 1, 2) AS substr_asdf,
    /* some commenT
       there */
    CASE WHEN a.asdf = 1
        THEN 'b' /* here a case comment */
        WHEN b.qwer = 2
        THEN 'c'
        ELSE 'd'
    END AS new_field, -- Some comment
    /* and here some inline comment */
    b.asdf2
FROM table1 AS a
    LEFT JOIN table2 AS b -- and here a comment
        ON a.asdf = b.asdf /* joiN this way */
    INNER JOIN table3 AS c
        ON a.asdf = c.asdf
        AND a.qwer = b.qwer
WHERE a.asdf = 1 -- comment this
    AND b.qwer = 2
    AND a.asdf <= 1 --comment that
    OR b.qwer >= 5
GROUP BY a.asdf


In [1237]:
assert_and_print(
    format_simple_sql(
"""
create or replace table first_table as -- my first table
select car_id,
       avg(price) as avg_price,
from first_view
group by car_id
"""
    ),
"""
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
    avg(price) AS avg_price
FROM first_view
GROUP BY car_id
""".strip()
)

CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
    avg(price) AS avg_price
FROM first_view
GROUP BY car_id


## Queries with subqueries

This is how we could (badly) write a query with subqueries

In [1238]:
example_with_subqueries = """
select asdf, cast(qwer as numeric), -- some comment
substr(qwer1, 3, 2) as substr_qwer /* some field */
from 
(select asdf, qwer, /* some nice field */ from table1 where asdf = 1
) as a
left 
join (select asdf, qwer2 from table2 where qwer2 = 1) as b
on a.asdf = b.asdf
where qwer1 >= 0
"""

and this is the way we would like to have it nicely formatted

In [1239]:
expected_with_subqueries = """
SELECT asdf,
    cast(qwer AS numeric), -- some comment
    substr(qwer1, 3, 2) AS substr_qwer /* some field */
FROM (
    SELECT asdf,
        qwer /* some nice field */
    FROM table1
    WHERE asdf = 1
) AS a
    LEFT JOIN (
        SELECT asdf,
            qwer2
        FROM table2
        WHERE qwer2 = 1
    ) AS b
        ON a.asdf = b.asdf
WHERE qwer1 >= 0
""".strip()

### Main function handling queries with subqueries

In [1240]:
#export
def format_sql(s, semicolon=False, max_len=99):
    "Format SQL query with subqueries `s`"
    s = format_simple_sql(s, semicolon=semicolon, max_len=max_len)  # basic query formatting
    # get first outer subquery positions
    subquery_pos = extract_outer_subquery(s)
    # loop over subqueries
    while subquery_pos is not None:
        # get split
        split_s = [
            s[0:subquery_pos[0]+2],
            s[subquery_pos[0]+2:(subquery_pos[1]+1)],
            s[(subquery_pos[1]+1):]
        ]
        # format subquery (= split_s[1])
        split_s[1] = format_subquery(split_s[1], split_s[0])
        # join main part and subquery
        s = "".join(split_s)

        # get first outer subquery positions
        subquery_pos = extract_outer_subquery(s)
    
    # format too long string
    split_s = split_comment_quote(s)
    split_comment = compress_dicts(split_s, ["comment"])

    # separate comment and the code since the length of comment is not considered
    s_code = "".join([d["string"] for d in split_s if not d["comment"]])
    s_code = s_code.split("\n")
    # loop for each line, reformat it if it is too long
    s_id = 0
    while s_id < len(s_code):
        sp = s_code[s_id]
        if len(sp) > max_len:
            sp_code = "\n".join(s_code[s_id:])
            if split_index := extract_outer_subquery_too_long(sp_code, max_len):
                zip_split = zip([-1] + split_index, split_index + [len(s)])
                ss = [sp_code[i+1:j+1] for i,j in zip_split]
                for i in range(1, len(ss) - 1):
                    ss[i] = "\n" + format_subquery_too_long(ss[i].strip(), ss[0])
                sp_code = "".join(ss)
                s_code = s_code[:s_id] + sp_code.split("\n")
                
        s_id += 1

    s_code = "\n".join([ss.rstrip() for ss in s_code])
    comment_dicts = []
    for i, d in enumerate(split_comment):
        if d["comment"]:
            comment_dicts.append({"comment": d["string"], "preceding": split_comment[i-1]["string"]})
    # assign comments to text
    s = assign_comment(s_code, comment_dicts)
    return s

In [1241]:
assert_and_print(
    format_sql(example_with_subqueries),
    expected_with_subqueries
)

SELECT asdf,
    cast(qwer AS numeric), -- some comment
    substr(qwer1, 3, 2) AS substr_qwer /* some field */
FROM (
    SELECT asdf,
        qwer /* some nice field */
    FROM table1
    WHERE asdf = 1
) AS a
    LEFT JOIN (
        SELECT asdf,
            qwer2
        FROM table2
        WHERE qwer2 = 1
    ) AS b
        ON a.asdf = b.asdf
WHERE qwer1 >= 0


It even works with simple queries without subqueries, therefore generalizing the `format_simple_sql()` function

In [1242]:
# TODO:
# still have problem; cannot handle sql with too many comment
# assert_and_print(
#     format_sql(example_sql),
#     expected_sql
# )

## Nested subqueries

The function is also robust against nested subqueries

In [1243]:
example_nested_subqueries = """
select asdf, qwer
from (select a.asdf,  lead(a.substr_qwer) over (partition by a.asdf, asdf2 order by qwer) as lead_qwerty
    from (select asdf, substr(qwer, 3, 2) as substr_qwer from table2) as a
        inner join (select asdf, qwer from table3) as b
            on a.qwer = b.qwer
)
"""

In [1244]:
expected_nested = """SELECT asdf,
    qwer
FROM (
    SELECT a.asdf,
        lead(a.substr_qwer) OVER (
            PARTITION BY a.asdf,
                asdf2
            ORDER BY qwer
        ) AS lead_qwerty
    FROM (
        SELECT asdf,
            substr(qwer, 3, 2) AS substr_qwer
        FROM table2
    ) AS a
        INNER JOIN (
            SELECT asdf,
                qwer
            FROM table3
        ) AS b
            ON a.qwer = b.qwer
)"""

In [1245]:
assert_and_print(
    format_sql(example_nested_subqueries),
    expected_nested
)

SELECT asdf,
    qwer
FROM (
    SELECT a.asdf,
        lead(a.substr_qwer) OVER (
            PARTITION BY a.asdf,
                asdf2
            ORDER BY qwer
        ) AS lead_qwerty
    FROM (
        SELECT asdf,
            substr(qwer, 3, 2) AS substr_qwer
        FROM table2
    ) AS a
        INNER JOIN (
            SELECT asdf,
                qwer
            FROM table3
        ) AS b
            ON a.qwer = b.qwer
)


In [1246]:
sql = """WITH sso_users AS (
            SELECT sso.sso_id,
                sso.user_id,
                sso.created,
                sso.app_id
            FROM analytics_sso_user AS sso
                JOIN analytics_registered_webhooks AS hook
                    ON sso.sso_id = hook.sso_id
        )
        SELECT date_trunc('day', created) AS created_at,
            count(*) FILTER (
                WHERE app_id = '43ed6e70-ad21-4272-8f7a-27e672be5117' OR DLDFASDLFKJDASLFJDASKLFJDSFapp_id IS NULL
            ) AS assistant,
            count(*) FILTER (
                WHERE app_id = '86c12be4-b2f3-47db-a4ff-a4d3ea2d442d'
                        AND verified IS NULL
            ) AS office365,
            count(*) FILTER (
                WHERE app_id = '86c12be4-b2f3-47db-a4ff-a4d3ea2d442d'
                        AND verified IS NOT NULL
            ) AS imap
        FROM analytics_user AS usr
            FULL JOIN sso_users AS sso
                ON usr.user_id = sso.user_id
            FULL JOIN analytics_mailbox_subscription AS msub
                ON msub.user_id = usr.user_id
            JOIN analytics_organization AS org
                ON usr.organization_id = org.organization_id
        WHERE NOT (sso.sso_id IS NULL
                AND verified IS NULL)
            AND usr.username NOT LIKE '%@snapaddy.com'
        GROUP BY created_at
        ORDER BY created_at ASC"""

print(format_sql(sql))

WITH sso_users AS (
    SELECT sso.sso_id,
        sso.user_id,
        sso.created,
        sso.app_id
    FROM analytics_sso_user AS sso
        JOIN analytics_registered_webhooks AS hook
            ON sso.sso_id = hook.sso_id
)
SELECT date_trunc('day', created) AS created_at,
    count(*) FILTER (
        WHERE app_id = '43ed6e70-ad21-4272-8f7a-27e672be5117'
            OR dldfasdlfkjdaslfjdasklfjdsfapp_id IS NULL
    ) AS assistant,
    count(*) FILTER (
        WHERE app_id = '86c12be4-b2f3-47db-a4ff-a4d3ea2d442d'
            AND verified IS NULL
    ) AS office365,
    count(*) FILTER (
        WHERE app_id = '86c12be4-b2f3-47db-a4ff-a4d3ea2d442d'
            AND verified IS NOT NULL
    ) AS imap
FROM analytics_user AS usr
    FULL JOIN sso_users AS sso
        ON usr.user_id = sso.user_id
    FULL JOIN analytics_mailbox_subscription AS msub
        ON msub.user_id = usr.user_id
    JOIN analytics_organization AS org
        ON usr.organization_id = org.organization_id
WHERE NO

### With SELECT DISTINCT

In [1247]:
assert_and_print(
    format_sql(
"""
select asdf, qwer from (select distinct asdf, qwer from table1)
"""
    ),
"""
SELECT asdf,
    qwer
FROM (
    SELECT DISTINCT asdf,
        qwer
    FROM table1
)""".strip()
)

SELECT asdf,
    qwer
FROM (
    SELECT DISTINCT asdf,
        qwer
    FROM table1
)


### More convoluted nested subquery

In [1248]:
example_convoluted = """
select asdf
from (
    select asdf, qwer, /* some comment */
    from (select a.asdf, b.qwer, --some comment
          from (select asdf 
                from table1) as a 
            right join (select qwer 
                        from table2) as b
                on a.asdf = b.asdf)
)
"""

In [1249]:
expected_convoluted = """
SELECT asdf
FROM (
    SELECT asdf,
        qwer /* some comment */
    FROM (
        SELECT a.asdf,
            b.qwer --some comment
        FROM (
            SELECT asdf
            FROM table1
        ) AS a
            RIGHT JOIN (
                SELECT qwer
                FROM table2
            ) AS b
                ON a.asdf = b.asdf
    )
)
""".strip()

In [1250]:
assert_and_print(
    format_sql(example_convoluted),
    expected_convoluted
)

SELECT asdf
FROM (
    SELECT asdf,
        qwer /* some comment */
    FROM (
        SELECT a.asdf,
            b.qwer --some comment
        FROM (
            SELECT asdf
            FROM table1
        ) AS a
            RIGHT JOIN (
                SELECT qwer
                FROM table2
            ) AS b
                ON a.asdf = b.asdf
    )
)


In [1251]:
assert_and_print(
    format_sql(
"""
select asdf,
qwer
from table1 union select qwer,
asdf, asdf2 from table3
where asdf2 >=2
"""
    ),
"""
SELECT asdf,
    qwer
FROM table1
UNION
SELECT qwer,
    asdf,
    asdf2
FROM table3
WHERE asdf2 >= 2
""".strip()
)

SELECT asdf,
    qwer
FROM table1
UNION
SELECT qwer,
    asdf,
    asdf2
FROM table3
WHERE asdf2 >= 2


Partition By with newline

In [1252]:
assert_and_print(
format_sql("""
create or replace table asdf as
select asdf, qwer over (
partition by asdf, qwer order by qwerty
)
from table1
"""),
"""
CREATE OR REPLACE TABLE asdf AS
SELECT asdf,
    qwer OVER (
        PARTITION BY asdf,
            qwer
        ORDER BY qwerty)
FROM table1
""".strip()
)

CREATE OR REPLACE TABLE asdf AS
SELECT asdf,
    qwer OVER (
        PARTITION BY asdf,
            qwer
        ORDER BY qwerty)
FROM table1


Too long lines query

In [1253]:
assert_and_print(
format_sql("""
create or replace table asdf as
select asdf, 
case when asdf in (123412341234, 12341234123412, 123412341234, 512351235132, 123412341, 1234) then 1 else 0 end as qwerty,
qwer over (
partition by asdf, qwer order by qwerty
)
from table1
"""),
"""
CREATE OR REPLACE TABLE asdf AS
SELECT asdf,
    CASE WHEN asdf IN (123412341234, 12341234123412, 123412341234, 512351235132, 123412341, 1234)
        THEN 1
        ELSE 0
    END AS qwerty,
    qwer OVER (
        PARTITION BY asdf,
            qwer
        ORDER BY qwerty)
FROM table1
""".strip()
)

CREATE OR REPLACE TABLE asdf AS
SELECT asdf,
    CASE WHEN asdf IN (123412341234, 12341234123412, 123412341234, 512351235132, 123412341, 1234)
        THEN 1
        ELSE 0
    END AS qwerty,
    qwer OVER (
        PARTITION BY asdf,
            qwer
        ORDER BY qwerty)
FROM table1


In [1254]:
testcase = """WITH days AS (
    SELECT generate_series(:start_date, :end_date, '1 day'::interval) AS day
)
SELECT days.day,
       count(*) FILTER (
           WHERE  job.state = 'completed'
       ) AS completed_jobs_count,
       count(*) FILTER (
           WHERE  job.state = 'failed'
       ) AS failed_jobs_count,
       coalesce(sum((job.return_value ->> 'successRows')::int) FILTER (
           WHERE  job.state = 'completed'
       ),
                0) AS completed_rows_count,
       coalesce(sum((job.return_value ->> 'totalRows')::int - (job.return_value ->> 'successRows')::int) FILTER (
           WHERE  job.state = 'completed'
       ),
                0) AS failed_rows_count
FROM   days
LEFT
JOIN   (
    SELECT state,
           return_value,
           created_at
    FROM   analytics_job
    WHERE  type = 'data_enrichment'
       AND data ->> 'strategy' = 'EMAIL_FINDER'
       AND created_at BETWEEN :start_date
                          AND (:end_date)::date + '23:59:59.999999'::time
) AS job
    ON date_trunc('day', job.created_at) = days.day
GROUP BY 1
ORDER BY days.day DESC;"""

In [1255]:
expected_testcase = """WITH days AS (
    SELECT generate_series(:start_date, :end_date, '1 day'::interval) AS day
)
SELECT days.day,
    count(*) FILTER (
        WHERE job.state = 'completed'
    ) AS completed_jobs_count,
    count(*) FILTER (
        WHERE job.state = 'failed'
    ) AS failed_jobs_count,
    coalesce(sum((job.return_value ->> 'successRows')::int) FILTER (
        WHERE job.state = 'completed'
    ), 0) AS completed_rows_count,
    coalesce(
        sum(
            (job.return_value ->> 'totalRows')::int - (job.return_value ->> 'successRows')::int
        ) FILTER (
            WHERE job.state = 'completed'
        ),
        0
    ) AS failed_rows_count
FROM days
    LEFT JOIN (
        SELECT state,
            return_value,
            created_at
        FROM analytics_job
        WHERE type = 'data_enrichment'
            AND data ->> 'strategy' = 'EMAIL_FINDER'
            AND created_at BETWEEN :start_date AND (:end_date)::date + '23:59:59.999999'::time
    ) AS job
        ON date_trunc('day', job.created_at) = days.day
GROUP BY 1
ORDER BY days.day DESC;"""

In [1256]:
assert_and_print(format_sql(testcase), expected_testcase)

WITH days AS (
    SELECT generate_series(:start_date, :end_date, '1 day'::interval) AS day
)
SELECT days.day,
    count(*) FILTER (
        WHERE job.state = 'completed'
    ) AS completed_jobs_count,
    count(*) FILTER (
        WHERE job.state = 'failed'
    ) AS failed_jobs_count,
    coalesce(sum((job.return_value ->> 'successRows')::int) FILTER (
        WHERE job.state = 'completed'
    ), 0) AS completed_rows_count,
    coalesce(
        sum(
            (job.return_value ->> 'totalRows')::int - (job.return_value ->> 'successRows')::int
        ) FILTER (
            WHERE job.state = 'completed'
        ),
        0
    ) AS failed_rows_count
FROM days
    LEFT JOIN (
        SELECT state,
            return_value,
            created_at
        FROM analytics_job
        WHERE type = 'data_enrichment'
            AND data ->> 'strategy' = 'EMAIL_FINDER'
            AND created_at BETWEEN :start_date AND (:end_date)::date + '23:59:59.999999'::time
    ) AS job
        ON date

In [1257]:
print(format_sql(""" WITH days AS (
                SELECT generate_series(:start_date, :end_date, '1 day'::interval) AS day
            )
            SELECT days.day,
                COUNT(*) FILTER (WHERE job.state = 'completed') AS completed_jobs_count,
                COUNT(*) FILTER (WHERE job.state = 'failed') AS failed_jobs_count,
                COALESCE(SUM((job.return_value->>'successRows')::int)
                    FILTER (WHERE job.state = 'completed'), 0) AS completed_rows_count,
                COALESCE(SUM((job.return_value->>'totalRows')::int - (job.return_value->>'successRows')::int)
                    FILTER (WHERE job.state = 'completed'), 0) AS failed_rows_count
            FROM days
            LEFT JOIN (
                SELECT state, return_value, created_at
                FROM analytics_job
                WHERE type = 'data_enrichment'
                    AND data->>'strategy' = 'EMAIL_FINDER'
                    AND created_at BETWEEN :start_date
                        AND (:end_date)::date + '23:59:59.999999'::time
            ) AS job ON date_trunc('day', job.created_at) = days.day
            GROUP BY 1
            ORDER BY days.day"""))

WITH days AS (
    SELECT generate_series(:start_date, :end_date, '1 day'::interval) AS day
)
SELECT days.day,
    count(*) FILTER (
        WHERE job.state = 'completed'
    ) AS completed_jobs_count,
    count(*) FILTER (
        WHERE job.state = 'failed'
    ) AS failed_jobs_count,
    coalesce(sum((job.return_value ->> 'successRows')::int) FILTER (
        WHERE job.state = 'completed'
    ), 0) AS completed_rows_count,
    coalesce(
        sum(
            (job.return_value ->> 'totalRows')::int - (job.return_value ->> 'successRows')::int
        ) FILTER (
            WHERE job.state = 'completed'
        ),
        0
    ) AS failed_rows_count
FROM days
    LEFT JOIN (
        SELECT state,
            return_value,
            created_at
        FROM analytics_job
        WHERE type = 'data_enrichment'
            AND data ->> 'strategy' = 'EMAIL_FINDER'
            AND created_at BETWEEN :start_date AND (:end_date)::date + '23:59:59.999999'::time
    ) AS job
        ON date

In [1258]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted 01_format_file.ipynb.
Converted 02_utils.ipynb.
Converted 03_validation.ipynb.
Converted 99_additional_tests.ipynb.
Converted index.ipynb.
