In [42]:
import csv
import sqlparse as sp
from sqlparse.sql import IdentifierList, Identifier, Where, Comparison
from sqlparse.tokens import Keyword, DML, Newline, Whitespace, Text, Token

In [93]:
'''Data loading functions'''

def extract_metadata():
    path = ""
    metafile = open('files/metadata.txt', 'r') 
    metalines = metafile.readlines() 
    tables_dict = {}
    tables_meta = {}
    tables_list = {}
    is_rec = False
    is_tname = False
    cur_table = ""
    for line in metalines:
        if line.startswith('<begin_table>'):
            is_rec = True
            is_tname = True
        elif line.startswith('<end_table>'):
            if_rec = False
        elif is_tname:
            cur_table = str(line).strip()
            tables_dict[cur_table] = {}
            tables_meta[cur_table] = []
            tables_list[cur_table] = []
            is_tname = False
        else:
            tables_dict[cur_table][str(line).strip()] = []
            tables_meta[cur_table].append(str(line).strip())
    return tables_meta,tables_dict,tables_list
            
def extract_csvdata_bycols(tables_dict):
    tables_data = tables_dict
    for tn,cl in tables_dict.items(): 
        with open('files/'+tn+'.csv', newline='') as table_file:
            all_data = csv.reader(table_file,delimiter=',')
#             print(all_data)
            for row in all_data:
                for c,i in zip(cl,range(len(cl))):
                    tables_data[tn][c].append(int(row[i]))
    return tables_data

def extract_csvdata_byrows(tables_list):
    tables_data = tables_list
    for tn in tables_list: 
        with open('files/'+tn+'.csv', newline='') as table_file:
            all_data = csv.reader(table_file,delimiter=',')
#             print(all_data)
            for row in all_data:
                tables_data[tn].append([int(x) for x in row])
    return tables_data
    

In [94]:
tables_meta,tables_dict,tables_list = extract_metadata()
print(tables_meta)
print(tables_list)
tables_data_bycols = extract_csvdata_bycols(tables_dict)
print(tables_data_bycols)
tables_data_byrows = extract_csvdata_byrows(tables_list)
print(tables_data_byrows)

{'table1': ['A', 'B', 'C'], 'table2': ['D', 'E']}
{'table1': [], 'table2': []}
{'table1': {'A': [922, 640, 775, -551, -952, -354, -497, 411, -900, 858], 'B': [158, 773, 85, 811, 311, 646, 335, 803, 718, 731], 'C': [5727, 5058, 10164, 1534, 1318, 7063, 4549, 10519, 9020, 3668]}, 'table2': {'D': [159, 774, 86, 812, 312, 647, 336, 804, 719, 732], 'E': [11191, 14421, 5117, 13393, 16116, 5403, 6309, 12262, 10226, 13021]}}
{'table1': [[922, 158, 5727], [640, 773, 5058], [775, 85, 10164], [-551, 811, 1534], [-952, 311, 1318], [-354, 646, 7063], [-497, 335, 4549], [411, 803, 10519], [-900, 718, 9020], [858, 731, 3668]], 'table2': [[159, 11191], [774, 14421], [86, 5117], [812, 13393], [312, 16116], [647, 5403], [336, 6309], [804, 12262], [719, 10226], [732, 13021]]}


In [116]:
'''SQL functions'''

def remove_wspaces(parsed_sql):
    modf_parsed_sql = []
    for token in parsed_sql:
        if token.is_whitespace:
            continue
        modf_parsed_sql.append(token)
    return modf_parsed_sql


def get_conditions(w_condition):
#     print("++++++++",w_condition.tokens)
    modf_condition = remove_wspaces(w_condition)
    cndtn = []
    for token in modf_condition:
#         print(token,token.ttype)
        cndtn.append(token)
#         if isinstance(token,Identifier):
#             cndtn.append(token)
#         elif isinstance(token,Token.Operator.Comparison):
#             cndtn.append(token)
#         elif isinstance(token,Token.Literal.Number.Integer):
#             cndtn.append(token)
    return cndtn
    

def process_where(where_stmnt):
#     where_stmnt = sp.parse(where_stmnt)[0]
    where_dict = {'andor':"", 'conditions':[]}
#     print("@@@@@@@@@@@",where_stmnt.tokens)
    modf_where_stmnt = remove_wspaces(where_stmnt)
    for token in modf_where_stmnt:
        print(token,token.ttype,token.value)
        if token.ttype is Keyword and token.value == 'where':
            continue
        if token.ttype is Keyword and token.value == 'and':
            where_dict['andor'] = 'and'
        elif token.ttype is Keyword and token.value == 'or':
            where_dict['andor'] = 'and'
        else:
            where_dict['conditions'].append(get_conditions(token))
    return where_dict
    
    
def process_query(parsed_sql):
    modf_parsed_sql = remove_wspaces(parsed_sql)
    curr_token = ""
    q_columns,q_tables,q_conditions = [],[],{}
    for token in modf_parsed_sql:
#         print('====================',token,token.value,token.ttype)
        if token.ttype is DML and token.value == 'select':
            curr_token = 'select'
            continue
        if token.ttype is Keyword and token.value == 'from':
            curr_token = 'from'
            continue
        if isinstance(token,Where):
            curr_token = 'where'
            q_conditions = process_where(token)
            continue
        if curr_token == 'select':
            if isinstance(token, IdentifierList):
                for c in token.get_identifiers():
                    q_columns.append(c.get_name().upper())
            elif isinstance(token, Identifier):
                q_columns.append(token.get_name().upper())
            elif token.ttype is Token.Wildcard:
                q_columns = ['*']
        elif curr_token == 'from':
            if isinstance(token, IdentifierList):
                for t in token.get_identifiers():
                    q_tables.append(t.get_name())
            elif isinstance(token, Identifier):
                q_tables.append(token.get_name())
    return q_columns,q_tables,q_conditions
#         if token.ttype is Text.Whitespace.Newline:
#             print("*************")
#         print(token.ttype)
#         if token in ['select','from','where','and','or']
    
#     for qline in parsed_sql.split('\n'):
#         modf_parsed_sql.append(remove_wspaces(sp.parse(qline)))
#     qry_outpt = []
#     if modf_parsed_sql[0].lower() == 'select':
#         qry_output = fn_select(modf_parsed_sql)
#     return modf_parsed_sql

def join_tables(tables):
    join_data = tables_data_byrows[tables[0]]
#     num_rows = len(join_data)
    for t in tables[1:]:
        temp_join = []
        for rj in join_data:
            for rt in tables_data_byrows[t]:
                temp_join.append(rj+rt)
        join_data = temp_join
#         print("joined")
#         print(join_data)
    return join_data
            

def display(q_rows,tables,cols):
    cols_idx = []
    allcols = []
    for t in tables:
        allcols += tables_meta[t]
#     print('cols[0]',cols[0])
    if cols[0]=='*':
        for i in range(len(allcols)):
            cols_idx.append(i)
            print(allcols[i],end='\t')
    else:
        for c in cols:
            cols_idx.append(allcols.index(c.upper()))
        for c in cols:
            print(c,end='\t')
#     print("\n------------------------------")
    print()
    for row in q_rows:
        for i in cols_idx:
            print(row[i],end='\t')
        print()
    print("Rows displayed:",len(q_rows))
    

def execute_query(tables,where,cols):
    q_data = []
    if len(tables)>1:
        q_data = join_tables(tables)
    else:
        q_data = tables_data_byrows[tables[0]]
#     if where:
    display(q_data,tables,cols)
        
    

In [117]:
# qry_input = input().strip().lower()
qry_input = "select * from table1, table2".lower()
frmt_qry = sp.format(qry_input,reindent=True, keyword_case='upper')
# print(frmt_qry[1])
parsed_sql = sp.parse(qry_input)[0]
print(parsed_sql.tokens)
q_columns,q_tables,q_conditions = process_query(parsed_sql)
print(q_columns)
print(q_tables)
print(q_conditions)
print("--------OUTPUT--------")
execute_query(q_tables,q_conditions,q_columns)

[<DML 'select' at 0x7F4F2426D948>, <Whitespace ' ' at 0x7F4F242352E8>, <Wildcard '*' at 0x7F4F24235168>, <Whitespace ' ' at 0x7F4F242351C8>, <Keyword 'from' at 0x7F4F24235228>, <Whitespace ' ' at 0x7F4F242C76A8>, <IdentifierList 'table1...' at 0x7F4F242AF8B8>]
['*']
['table1', 'table2']
{}
--------OUTPUT--------
A	B	C	D	E	
922	158	5727	159	11191	
922	158	5727	774	14421	
922	158	5727	86	5117	
922	158	5727	812	13393	
922	158	5727	312	16116	
922	158	5727	647	5403	
922	158	5727	336	6309	
922	158	5727	804	12262	
922	158	5727	719	10226	
922	158	5727	732	13021	
640	773	5058	159	11191	
640	773	5058	774	14421	
640	773	5058	86	5117	
640	773	5058	812	13393	
640	773	5058	312	16116	
640	773	5058	647	5403	
640	773	5058	336	6309	
640	773	5058	804	12262	
640	773	5058	719	10226	
640	773	5058	732	13021	
775	85	10164	159	11191	
775	85	10164	774	14421	
775	85	10164	86	5117	
775	85	10164	812	13393	
775	85	10164	312	16116	
775	85	10164	647	5403	
775	85	10164	336	6309	
775	85	10164	804	12262	
775	85	10164	71

In [92]:
# qry_input = input().strip()
# parsed_sql = sp.parse(qry_input)[0]
# print(parsed_sql)
# print(parsed_sql.tokens)
# print(sp.split(qry_input))
# print(qry_output[0])
# print(qry_output[1][0])
print(qry_output[4][2])

NameError: name 'qry_output' is not defined

In [30]:
print(parsed_sql.tokens[1])
print(parsed_sql.flatten())
for token in parsed_sql.tokens:
    if(token.is_whitespace):
        print("w")
    print(token)

 
<generator object TokenList.flatten at 0x7f82841600f8>
select
w
 
a, b
w
 
from
w
 
tab1,tab2
