In [26]:
import scripts.sql_parser.sqlite_parser as sql
from scripts.sql_parser.constants import *
from scripts.sql_parser.table_operations import get_table

NC_PATH = '../../data_sets/db/northwind.db'
SC_PATH = '../../data_sets/db/sakila.db'
CC_PATH = '../../data_sets/db/covid19.db'

# instantiate parser objects based on the db path
nc, sc, cc = sql.SqlParser(NC_PATH), sql.SqlParser(SC_PATH), sql.SqlParser(CC_PATH)

# create a table reference for each db for faster querying
nct, sct, cct = nc.table, sc.table, cc.table

In [27]:
# Get all Northwind tables
nc.q(sql.ALL_TABLES)

Unnamed: 0,name
0,Categories
1,sqlite_sequence
2,CustomerCustomerDemo
3,CustomerDemographics
4,Customers
5,Employees
6,EmployeeTerritories
7,Order Details
8,Orders
9,Products


In [28]:
# handle Aggregation functions

nct(name='Order Details',
    cols=['avg=UnitPrice:UP Avg',
          'max=UnitPrice:UP Max',
          'min=UnitPrice:UP Min',
          'count=UnitPrice:UP Count'])


SELECT 
	AVG(UnitPrice) AS "UP Avg",
	MAX(UnitPrice) AS "UP Max",
	MIN(UnitPrice) AS "UP Min",
	COUNT(UnitPrice) AS "UP Count"
FROM "Order Details" 


Unnamed: 0,UP Avg,UP Max,UP Min,UP Count
0,28.850379,263.5,2,609283


In [29]:
# supports most of the clauses (except GROUP BY)

nct(name='Order Details',
    cols=['UnitPrice'], distinct=True,
    where=['&20.0 >= UnitPrice <= 70.0', '|UnitPrice > 20'],
    order_by=('UnitPrice', -1),
    limit=5, offset=2)


SELECT DISTINCT 
	UnitPrice
FROM "Order Details" 
WHERE UnitPrice BETWEEN 20.0 and 70.0
	OR UnitPrice > 20
ORDER BY UnitPrice DESC
LIMIT 5 OFFSET 2


Unnamed: 0,UnitPrice
0,123.79
1,99.0
2,97.0
3,81.0
4,77.6


In [30]:
# simple join (between 2 tables)

nc.join(table_left={NAME: 'Customers', COLS: ['CompanyName', 'Phone', 'Fax']},
        table_right={NAME: 'Orders:o', COLS: ['ShipRegion', 'ShipCountry']},
        shared_col='CustomerID',
        join=INNER,
        limit=5)


SELECT 
	Customers.CompanyName,
	Customers.Phone,
	Customers.Fax,
	o.ShipRegion,
	o.ShipCountry
FROM Customers
INNER JOIN Orders o
ON Customers.CustomerID = o.CustomerID
LIMIT 5


Unnamed: 0,CompanyName,Phone,Fax,ShipRegion,ShipCountry
0,Vins et alcools Chevalier,26.47.15.10,26.47.15.11,Western Europe,France
1,Toms Spezialitäten,0251-031259,0251-035695,Western Europe,Germany
2,Hanari Carnes,(21) 555-0091,(21) 555-8765,South America,Brazil
3,Victuailles en stock,78.32.54.86,78.32.54.87,Western Europe,France
4,Suprêmes délices,(071) 23 67 22 20,(071) 23 67 22 21,Western Europe,Belgium


In [31]:
# multi join (between more than 2 tables, based on different join types)

customers_table = get_table(name='Customers:C',
                            shared='CustomerID',
                            cols=['CompanyName:MyCompany', 'Phone:                 MyPhone', 'Fax:MyFax'])
orders_table = get_table(name='Orders:O',
                         shared='CustomerID',
                         cols=['ShipRegion', 'ShipCountry'],
                         join=INNER)
order_details_table = get_table(name='Order Details:OD',
                                shared='OrderId',
                                cols=['ProductId', 'Quantity', 'UnitPrice'],
                                join=LEFT)

print(nc.multi_join(tables=([customers_table, orders_table, order_details_table]),
                    starts_with=('Phone', '3'),
                    order_by=('ShipRegion', 1),
                    limit=5,
                    distinct=True))


SELECT DISTINCT 
	C.CompanyName AS MyCompany,
	C.Phone AS MyPhone,
	C.Fax AS MyFax,
	O.ShipRegion,
	O.ShipCountry,
	OD.ProductId,
	OD.Quantity,
	OD.UnitPrice
FROM Customers C
INNER JOIN Orders O ON C.CustomerID = O.CustomerID
LEFT JOIN "Order Details" OD ON O.CustomerID = OD.OrderId
WHERE Phone LIKE '3%'
ORDER BY ShipRegion ASC
LIMIT 5

              MyCompany      MyPhone        MyFax       ShipRegion  \
0         Simons bistro  31 12 34 56  31 13 35 57    British Isles   
1  La corne d'abondance  30.59.84.10  30.59.85.11    British Isles   
2  La corne d'abondance  30.59.84.10  30.59.85.11    British Isles   
3  La corne d'abondance  30.59.84.10  30.59.85.11  Central America   
4         Simons bistro  31 12 34 56  31 13 35 57  Central America   

  ShipCountry ProductID Quantity UnitPrice  
0          UK      None     None      None  
1          UK      None     None      None  
2     Ireland      None     None      None  
3      Mexico      None     None      None  
4      Mexico 

In [32]:
# the same multi join as above, but by using direct dictionaries, without calling get_table()

nc.multi_join(
    tables=([{NAME: 'Customers:C', SHARED: 'CustomerID', COLS: ['CompanyName:Company Name', 'Phone:Land Phone']},
             {NAME: 'Orders:O', SHARED: 'CustomerID', COLS: ['ShipCountry'], JOIN: INNER},
             {NAME: 'Order Details:OD', SHARED: 'OrderId', COLS: ['ProductId', 'Quantity', 'UnitPrice: Price'],
              JOIN: LEFT}]),
    limit=5)


SELECT 
	C.CompanyName AS "Company Name",
	C.Phone AS "Land Phone",
	O.ShipCountry,
	OD.ProductId,
	OD.Quantity,
	OD.UnitPrice AS Price
FROM Customers C
INNER JOIN Orders O ON C.CustomerID = O.CustomerID
LEFT JOIN "Order Details" OD ON O.CustomerID = OD.OrderId
LIMIT 5


Unnamed: 0,Company Name,Land Phone,ShipCountry,ProductID,Quantity,Price
0,Vins et alcools Chevalier,26.47.15.10,France,,,
1,Toms Spezialitäten,0251-031259,Germany,,,
2,Hanari Carnes,(21) 555-0091,Brazil,,,
3,Victuailles en stock,78.32.54.86,France,,,
4,Suprêmes délices,(071) 23 67 22 20,Belgium,,,


In [33]:
nct(name='Orders',
    cols=['ShipCountry', 'count=*:OrderCount'],
    group_by='ShipCountry',
    order_by=('OrderCount', -1))


SELECT 
	ShipCountry,
	COUNT("*") AS OrderCount
FROM Orders 
GROUP BY ShipCountry
ORDER BY OrderCount DESC



Unnamed: 0,ShipCountry,OrderCount
0,USA,2328
1,Germany,2193
2,France,1778
3,Brazil,1683
4,UK,1280
5,Mexico,899
6,Venezuela,707
7,Spain,691
8,Canada,547
9,Italy,538


In [34]:
# Get all covid19 tables

cc.q(sql.ALL_TABLES)

Unnamed: 0,name
0,android_metadata
1,csvImport
2,Country
3,!Info
4,Cases
5,CasesPrevious
6,CountryPrevious
7,India
8,csvImportIndia
9,IndiaCases


In [35]:
cct(name='IndiaCases',
    cols=['Confirmed', 'Date'],
    order_by=['confirmed', -1],
    limit=5)


SELECT 
	Confirmed,
	Date
FROM IndiaCases 
ORDER BY confirmed DESC
LIMIT 5



Unnamed: 0,Confirmed,Date
0,94041,2020-06-11
1,90787,2020-06-10
2,88528,2020-06-09
3,85975,2020-06-08
4,82968,2020-06-07
