In [55]:
import csv
import sqlparse as sp
from sqlparse.sql import IdentifierList, Identifier, Where, Comparison
from sqlparse.tokens import Keyword, DML, Newline, Whitespace, Text, Token

In [56]:
'''Data loading functions'''

def extract_metadata():
    path = ""
    metafile = open('files/metadata.txt', 'r') 
    metalines = metafile.readlines() 
    tables_dict = {}
    tables_meta = {}
    tables_list = {}
    is_rec = False
    is_tname = False
    cur_table = ""
    for line in metalines:
        if line.startswith('<begin_table>'):
            is_rec = True
            is_tname = True
        elif line.startswith('<end_table>'):
            if_rec = False
        elif is_tname:
            cur_table = str(line).strip()
            tables_dict[cur_table] = {}
            tables_meta[cur_table] = []
            tables_list[cur_table] = []
            is_tname = False
        else:
            tables_dict[cur_table][str(line).strip()] = []
            tables_meta[cur_table].append(str(line).strip())
    return tables_meta,tables_dict,tables_list
            
def extract_csvdata_bycols(tables_dict):
    tables_data = tables_dict
    for tn,cl in tables_dict.items(): 
        with open('files/'+tn+'.csv', newline='') as table_file:
            all_data = csv.reader(table_file,delimiter=',')
#             print(all_data)
            for row in all_data:
                for c,i in zip(cl,range(len(cl))):
                    tables_data[tn][c].append(int(row[i]))
    return tables_data

def extract_csvdata_byrows(tables_list):
    tables_data = tables_list
    for tn in tables_list: 
        with open('files/'+tn+'.csv', newline='') as table_file:
            all_data = csv.reader(table_file,delimiter=',')
#             print(all_data)
            for row in all_data:
                tables_data[tn].append([int(x) for x in row])
    return tables_data
    

In [57]:
tables_meta,tables_dict,tables_list = extract_metadata()
print(tables_meta)
print(tables_list)
tables_data_bycols = extract_csvdata_bycols(tables_dict)
print(tables_data_bycols)
tables_data_byrows = extract_csvdata_byrows(tables_list)
print(tables_data_byrows)

{'table1': ['A', 'B', 'C'], 'table2': ['D', 'E']}
{'table1': [], 'table2': []}
{'table1': {'A': [922, 640, 775, -551, -952, -354, -497, 411, -900, 858], 'B': [158, 773, 85, 811, 311, 646, 335, 803, 718, 731], 'C': [5727, 5058, 10164, 1534, 1318, 7063, 4549, 10519, 9020, 3668]}, 'table2': {'D': [158, 774, 86, 812, 312, 647, 336, 804, 719, 732], 'E': [11191, 14421, 5117, 13393, 16116, 5403, 6309, 12262, 10226, 13021]}}
{'table1': [[922, 158, 5727], [640, 773, 5058], [775, 85, 10164], [-551, 811, 1534], [-952, 311, 1318], [-354, 646, 7063], [-497, 335, 4549], [411, 803, 10519], [-900, 718, 9020], [858, 731, 3668]], 'table2': [[158, 11191], [774, 14421], [86, 5117], [812, 13393], [312, 16116], [647, 5403], [336, 6309], [804, 12262], [719, 10226], [732, 13021]]}


In [71]:
'''SQL functions'''

def remove_wspaces(parsed_sql):
    modf_parsed_sql = []
    for token in parsed_sql:
        if token.is_whitespace:
            continue
        modf_parsed_sql.append(token)
    return modf_parsed_sql


def attr_condition(cndtn):
    c_attr = {'id1':'', 'opr':'', 'id2':''}
    for token in cndtn:
        if isinstance(token,Identifier):
            if not c_attr['id1']:
                c_attr['id1'] = token.get_name()
            else:
                c_attr['id2'] = token.get_name()
        elif token.ttype is Token.Operator.Comparison:
            c_attr['opr'] += token.value
        elif token.ttype is Token.Literal.Number.Integer:
            if not c_attr['id1']:
                c_attr['id1'] = int(token.value)
            else:
                c_attr['id2'] = int(token.value)
    return c_attr
    

def process_where(where_stmnt):
    where_dict = {'andor':"", 'conditions':[]}
    modf_where_stmnt = remove_wspaces(where_stmnt)
    for token in modf_where_stmnt:
        if token.ttype is Keyword and token.value == 'where':
            continue
        if token.ttype is Keyword and token.value == 'and':
            where_dict['andor'] = 'and'
        elif token.ttype is Keyword and token.value == 'or':
            where_dict['andor'] = 'or'
        else:
            where_dict['conditions'].append(token)
    return where_dict
    
    
def process_query(parsed_sql):
    modf_parsed_sql = remove_wspaces(parsed_sql)
    curr_token = ""
    q_columns,q_tables,q_conditions = [],[],{}
    for token in modf_parsed_sql:
        if token.ttype is DML and token.value == 'select':
            curr_token = 'select'
            continue
        if token.ttype is Keyword and token.value == 'from':
            curr_token = 'from'
            continue
        if isinstance(token,Where):
            curr_token = 'where'
            q_conditions = process_where(token)
            continue
        if curr_token == 'select':
            if isinstance(token, IdentifierList):
                for c in token.get_identifiers():
                    q_columns.append(c.get_name().upper())
            elif isinstance(token, Identifier):
                q_columns.append(token.get_name().upper())
            elif token.ttype is Token.Wildcard:
                q_columns = ['*']
        elif curr_token == 'from':
            if isinstance(token, IdentifierList):
                for t in token.get_identifiers():
                    q_tables.append(t.get_name())
            elif isinstance(token, Identifier):
                q_tables.append(token.get_name())
    q_attributes = {}
    q_attributes['q_tables'] = q_tables
    q_attributes['q_cols'] = q_columns
    q_attributes['q_conditions'] = q_conditions
    return q_attributes

def join_tables(tables):
    join_data = tables_data_byrows[tables[0]]
#     num_rows = len(join_data)
    for t in tables[1:]:
        temp_join = []
        for rj in join_data:
            for rt in tables_data_byrows[t]:
                temp_join.append(rj+rt)
        join_data = temp_join
    return join_data
            

def display(q_rows,tables,cols):
    cols_idx = []
    allcols = []
    for t in tables:
        allcols += tables_meta[t]
    if cols[0]=='*':
        for i in range(len(allcols)):
            cols_idx.append(i)
            print(allcols[i],end='\t')
    else:
        for c in cols:
            cols_idx.append(allcols.index(c.upper()))
        for c in cols:
            print(c,end='\t')
    print()
    for row in q_rows:
        for i in cols_idx:
            print(row[i],end='\t')
        print()
    print("Rows displayed:",len(q_rows))
    
def compare_cols(row,c_attr,fc1,fc2,xc1,xc2):
    if (c_attr['opr'] == "=" and ((fc1 and fc2 and row[xc1]==row[xc2]) or ((not fc2) and row[xc1]==c_attr['id2']))) or \
       (c_attr['opr'] == ">" and ((fc1 and fc2 and row[xc1]>row[xc2]) or ((not fc2) and row[xc1]>c_attr['id2']))) or \
       (c_attr['opr'] == "<" and ((fc1 and fc2 and row[xc1]<row[xc2]) or ((not fc2) and row[xc1]<c_attr['id2']))) or \
       (c_attr['opr'] == "!=" and ((fc1 and fc2 and row[xc1]!=row[xc2]) or ((not fc2) and row[xc1]!=c_attr['id2']))) or \
       (c_attr['opr'] == "<=" and ((fc1 and fc2 and row[xc1]<=row[xc2]) or ((not fc2) and row[xc1]<=c_attr['id2']))) or \
       (c_attr['opr'] == ">=" and ((fc1 and fc2 and row[xc1]>=row[xc2]) or ((not fc2) and row[xc1]>=c_attr['id2']))):
        return True
    return False
    
def execute_where(q_rows,q_tables,q_where):
    cnames = []
    for t in q_tables:
        cnames += tables_meta[t]
    sel_rows = [False for i in range(len(q_rows))]
    ci=0
    for cndtn in q_where['conditions']:
        cndtn = remove_wspaces(cndtn)
        c_attr = attr_condition(cndtn)
        print(c_attr)
        fc1=False
        fc2=False
        xc1=-1
        xc2=-1
        if isinstance(c_attr['id1'],str):
            fc1 = True
            xc1 = cnames.index(c_attr['id1'].upper())
        if isinstance(c_attr['id2'],str):
            fc2 = True
            xc2 = cnames.index(c_attr['id2'].upper())
        print(fc1 and fc2)
        if ci==0:
            r = 0
            for row in q_rows:
                sel_rows[r] = compare_cols(row,c_attr,fc1,fc2,xc1,xc2)
                r+=1
        else:
            r=0
            if q_where['andor']=='and':
                for row in q_rows:
                    if sel_rows[r]:
                        sel_rows[r] = compare_cols(row,c_attr,fc1,fc2,xc1,xc2)
                    r+=1
            elif q_where['andor']=='or':
                for row in q_rows:
                    if not sel_rows[r]:
                        sel_rows[r] = compare_cols(row,c_attr,fc1,fc2,xc1,xc2)
                    r+=1
        ci+=1
    r=0
    new_qrows = []
    for row in q_rows:
        if sel_rows[r]:
            new_qrows.append(row)
        r+=1
    return new_qrows
            
    
def execute_query(q_attributes):
    q_data = []
    if len(q_attributes['q_tables'])>1:
        q_data = join_tables(q_attributes['q_tables'])
    else:
        q_data = tables_data_byrows[q_attributes['q_tables'][0]]
    if q_attributes['q_conditions']:
        q_data = execute_where(q_data,q_attributes['q_tables'],q_attributes['q_conditions'])
    display(q_data,q_attributes['q_tables'],q_attributes['q_cols'])
        
    

In [76]:
# qry_input = input().strip().lower()
qry_input = "select * from table1, table2 where B>600 and D<600".lower()
frmt_qry = sp.format(qry_input,reindent=True, keyword_case='upper')
# print(frmt_qry[1])
parsed_sql = sp.parse(qry_input)[0]
print(parsed_sql.tokens)
q_attributes = process_query(parsed_sql)
print(q_attributes)
# print(q_columns)
# print(q_tables)
# print(q_conditions)
print("--------OUTPUT--------")
execute_query(q_attributes)

[<DML 'select' at 0x7F880806CA68>, <Whitespace ' ' at 0x7F880806C2E8>, <Wildcard '*' at 0x7F8808051708>, <Whitespace ' ' at 0x7F8808051A68>, <Keyword 'from' at 0x7F8808051AC8>, <Whitespace ' ' at 0x7F8808051B28>, <IdentifierList 'table1...' at 0x7F880804C318>, <Whitespace ' ' at 0x7F8808051D08>, <Where 'where ...' at 0x7F8808053F48>]
{'q_tables': ['table1', 'table2'], 'q_cols': ['*'], 'q_conditions': {'andor': 'and', 'conditions': [<Comparison 'b>600' at 0x7F880804C228>, <Comparison 'd<600' at 0x7F880804C2A0>]}}
--------OUTPUT--------
{'id1': 'b', 'opr': '>', 'id2': 600}
False
{'id1': 'd', 'opr': '<', 'id2': 600}
False
A	B	C	D	E	
640	773	5058	158	11191	
640	773	5058	86	5117	
640	773	5058	312	16116	
640	773	5058	336	6309	
-551	811	1534	158	11191	
-551	811	1534	86	5117	
-551	811	1534	312	16116	
-551	811	1534	336	6309	
-354	646	7063	158	11191	
-354	646	7063	86	5117	
-354	646	7063	312	16116	
-354	646	7063	336	6309	
411	803	10519	158	11191	
411	803	10519	86	5117	
411	803	10519	312	16116	
41

In [31]:
# qry_input = input().strip()
# parsed_sql = sp.parse(qry_input)[0]
# print(parsed_sql)
# print(parsed_sql.tokens)
# print(sp.split(qry_input))
# print(qry_output[0])
# print(qry_output[1][0])
print(qry_output[4][2])

NameError: name 'qry_output' is not defined

In [32]:
print(parsed_sql.tokens[1])
print(parsed_sql.flatten())
for token in parsed_sql.tokens:
    if(token.is_whitespace):
        print("w")
    print(token)

 
<generator object TokenList.flatten at 0x7f880811f728>
select
w
 
*
w
 
from
w
 
table1, table2
w
 
where a>0


In [70]:
not 0

True