**Working thru Simon's SQL tutorials**
* https://www.youtube.com/watch?v=IrCowiKyyBA

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sql_query import sql_select
engine = create_engine('sqlite:///C:/Users/jpkee/Documents/My Tableau Repository/Datasources/F1_data-master/F1_data-master/Formula1.db')

In [2]:
# initialize a sql_select object

# sl  = SELECT
# fr = FROM 
# - parameter names can be omitted
qry = sql_select(
                sl=['driverId', 'forename', 'surname'],
                fr='drivers'
                    )


In [3]:
type(qry)

sql_query.sql_query.sql_select

In [4]:
# View the given attributes, grabbing the headers 
print(qry.sl)

['driverId', 'forename', 'surname']


In [5]:
# view the table
print(qry.fr)

drivers


In [6]:
# use the select method to ADD a field to our query
qry.select('nationality')

In [7]:
# Check that we are now getting back 'nationality' as well
print(qry.sl)

['forename', 'surname', 'nationality', 'driverId']


In [8]:
# change the fields, in this cas remove the previously provided fields
qry.select(['driverId', 'forename', 'surname'], clear = True)

In [9]:
# verify that we did indeed remove nationality
print(qry.sl)

['driverId', 'forename', 'surname']


In [10]:
# get your sql query as txt
print(qry)

SELECT driverId, forename, surname FROM drivers


In [11]:
## this is fun get the top rows with head function
data = pd.read_sql(str(qry), con=engine)

In [12]:
data.shape

(847, 3)

In [13]:
data.tail()

Unnamed: 0,driverId,forename,surname
842,844,Charles,Leclerc
843,845,Sergey,Sirotkin
844,846,Lando,Norris
845,847,George,Russell
846,848,Alexander,Albon


**Video #2**
* https://www.youtube.com/watch?v=l5_tcFaG0nM

In [14]:

# create a new select
s_qry = sql_select(['driverID', 'forename', 'surname'], 'drivers')
# more on sql_select here: https://programmer.spip.net/sql_select,655


In [15]:
# so basically here we are grabbing the columns and telling the query that 'drivers' is the table
 ## SELECT driverId, forename, surname FROM drivers, print it to confirm
print(s_qry)

SELECT driverID, forename, surname FROM drivers


In [16]:
# Basic Where statements
# use the .where method
# attribute is the field, cond is the value to search for in that field`
s_qry.where(attr="nationality", cond="Dutch")

In [17]:
print(s_qry)

SELECT driverID, forename, surname FROM drivers WHERE nationality = "Dutch"


In [18]:
# and here's the table for that query
data = pd.read_sql_query(str(s_qry), con=engine)
data.head()

Unnamed: 0,driverId,forename,surname
0,27,Christijan,Albers
1,38,Robert,Doornbos
2,50,Jos,Verstappen
3,136,Jan,Lammers
4,179,Huub,Rothengatter


In [19]:
# add a 2nd where statement
s_qry.where(attr="DriverId", cond=800, eq='<', d=True)
print(s_qry)

SELECT driverID, forename, surname FROM drivers WHERE nationality = "Dutch" AND NOT DriverId < 800


In [20]:
# now to get the table, call the dataframe
data = pd.read_sql_query(str(s_qry), con=engine)

In [21]:
data.head()

Unnamed: 0,driverId,forename,surname
0,823,Giedo,van der Garde
1,830,Max,Verstappen


In [22]:
# Use a list as a condition

# first, clear all the filters
s_qry.where(clear=True)

# check it out
print(s_qry)

SELECT driverID, forename, surname FROM drivers


In [23]:
s_qry.where(attr="nationality", cond=["Dutch", "Belgian"])

In [24]:
print(s_qry)
# note that this will OR the parameters

SELECT driverID, forename, surname FROM drivers WHERE (nationality = "Dutch" OR nationality = "Belgian")


In [25]:
# now get the dataframe/table back
data = pd.read_sql_query(str(s_qry), con=engine)
data.head()

Unnamed: 0,driverId,forename,surname
0,27,Christijan,Albers
1,38,Robert,Doornbos
2,50,Jos,Verstappen
3,92,Bertrand,Gachot
4,113,Philippe,Adams


In [26]:
# # try a select all
# s_qry2 = sql_select("SELECT * FROM tasks", 'drivers')

In [27]:
# Video 3
# https://www.youtube.com/watch?v=uu09SgA0xuc

# Group by and Having

# create a new select with a group by
# notice that you can use SQL aliases
s_qryV3 = sql_select( sl=['nationality', 'COUNT(driverId) as cnt'],
                    fr='drivers',
                    gb='nationality')




In [28]:
print(s_qryV3)

SELECT nationality, COUNT(driverId) as cnt FROM drivers GROUP BY nationality


In [29]:
dataV3 = pd.read_sql_query(str(s_qryV3), con=engine)

In [30]:
import seaborn as sns

In [31]:
# cm = sns.light_palette("red", as_cmap=True)

In [32]:
dataV3.style.background_gradient(cmap='viridis')

Unnamed: 0,nationality,cnt
0,American,157
1,American-Italian,1
2,Argentine,24
3,Argentine-Italian,1
4,Australian,17
5,Austrian,15
6,Belgian,23
7,Brazilian,31
8,British,164
9,Canadian,13


In [33]:
dataV3.head()

Unnamed: 0,nationality,cnt
0,American,157
1,American-Italian,1
2,Argentine,24
3,Argentine-Italian,1
4,Australian,17


In [34]:
dataV3.style.set_properties(**{'background-color': 'black',
                            'color': 'lawngreen',
                            'border-color': 'white'})

Unnamed: 0,nationality,cnt
0,American,157
1,American-Italian,1
2,Argentine,24
3,Argentine-Italian,1
4,Australian,17
5,Austrian,15
6,Belgian,23
7,Brazilian,31
8,British,164
9,Canadian,13


In [35]:
# # and per Video # 3, filter with .having method
# works the same as where
s_qryV3.having(attr='COUNT(driverId)', cond=100, eq='>')

In [36]:
print(s_qryV3)

SELECT nationality, COUNT(driverId) as cnt FROM drivers GROUP BY nationality HAVING COUNT(driverId) > 100


In [37]:
dataV3 = pd.read_sql_query(str(s_qryV3), con=engine)

In [38]:
dataV3.head()

Unnamed: 0,nationality,cnt
0,American,157
1,British,164
