This repository has been archived by the owner on Mar 12, 2020. It is now read-only.
/
statement_splitter.py
123 lines (98 loc) · 4.14 KB
/
statement_splitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# -*- coding: utf-8 -*-
#
# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
from sqlparse import sql, tokens as T
class StatementSplitter(object):
"""Filter that split stream at individual statements"""
def __init__(self):
self._reset()
def _reset(self):
"""Set the filter attributes to its default values"""
self._in_declare = False
self._in_dbldollar = False
self._is_create = False
self._begin_depth = 0
self.consume_ws = False
self.tokens = []
self.level = 0
def _change_splitlevel(self, ttype, value):
"""Get the new split level (increase, decrease or remain equal)"""
# PostgreSQL
if ttype == T.Name.Builtin and value[0] == '$' and value[-1] == '$':
# 2nd dbldollar found. $quote$ completed
# decrease level
if self._in_dbldollar:
self._in_dbldollar = False
return -1
else:
self._in_dbldollar = True
return 1
# if inside $$ everything inside is defining function character.
# Nothing inside can create a new statement
elif self._in_dbldollar:
return 0
# ANSI
# if normal token return
# wouldn't parenthesis increase/decrease a level?
# no, inside a paranthesis can't start new statement
if ttype not in T.Keyword:
return 0
# Everything after here is ttype = T.Keyword
# Also to note, once entered an If statement you are done and basically
# returning
unified = value.upper()
# three keywords begin with CREATE, but only one of them is DDL
# DDL Create though can contain more words such as "or replace"
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0
# can have nested declare inside of being...
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
self._in_declare = True
return 1
if unified == 'BEGIN':
self._begin_depth += 1
if self._is_create:
# FIXME(andi): This makes no sense.
return 1
return 0
# Should this respect a preceeding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
# Would having multiple CASE WHEN END and a Assigment Operator
# cause the statement to cut off prematurely?
if unified == 'END':
self._begin_depth = max(0, self._begin_depth - 1)
return -1
if (unified in ('IF', 'FOR', 'WHILE') and
self._is_create and self._begin_depth > 0):
return 1
if unified in ('END IF', 'END FOR', 'END WHILE'):
return -1
# Default
return 0
def process(self, stream):
"""Process the stream"""
EOS_TTYPE = T.Whitespace, T.Comment.Single
# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
# It will count newline token as a non whitespace. In this context
# whitespace ignores newlines.
# why don't multi line comments also count?
if self.consume_ws and ttype not in EOS_TTYPE:
yield sql.Statement(self.tokens)
# Reset filter and prepare to process next statement
self._reset()
# Change current split level (increase, decrease or remain equal)
self.level += self._change_splitlevel(ttype, value)
# Append the token to the current statement
self.tokens.append(sql.Token(ttype, value))
# Check if we get the end of a statement
if self.level <= 0 and ttype is T.Punctuation and value == ';':
self.consume_ws = True
# Yield pending statement (if any)
if self.tokens:
yield sql.Statement(self.tokens)