# Pandas SQLite3

In [1]:
import pandas as pd

In [2]:
from sqlite3 import connect

### Create a Dataframe

In [3]:
was_cancelled = ['Y','Y','N','Y','N']

df = pd.DataFrame(
    {"id":[1,2,3,4,5] , 
    "was_cancelled" : was_cancelled,
    "gender":['M','F','F','M','F'],
    "salary":[10,15,20,5,10],
    "name":['zaid','rakib','foyez','adnan','iqbal']
    }
)

### Create a SQLITE3 Connection

In [4]:
conn = connect(':memory:')

### Push Dataframe to the Database

In [5]:
df.to_sql('dataTable', conn)

5

### Read the Table

In [6]:
pd.read_sql("SELECT COUNT(*) AS ALL_FLIGHT FROM dataTable", conn)

Unnamed: 0,ALL_FLIGHT
0,5


In [7]:
pd.read_sql("SELECT DISTINCT gender FROM dataTable", conn)

Unnamed: 0,gender
0,M
1,F


In [8]:
myquery = """
SELECT * FROM dataTable
WHERE name > 'adnan' AND name < 'zaid'
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,1,2,Y,F,15,rakib
1,2,3,N,F,20,foyez
2,4,5,N,F,10,iqbal


In [9]:
myquery = """
SELECT * FROM dataTable
WHERE name BETWEEN 'adnan' AND 'zaid'
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,0,1,Y,M,10,zaid
1,1,2,Y,F,15,rakib
2,2,3,N,F,20,foyez
3,3,4,Y,M,5,adnan
4,4,5,N,F,10,iqbal


In [10]:
myquery = """
SELECT * FROM dataTable
ORDER BY name DESC
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,0,1,Y,M,10,zaid
1,1,2,Y,F,15,rakib
2,4,5,N,F,10,iqbal
3,2,3,N,F,20,foyez
4,3,4,Y,M,5,adnan


In [11]:
# myquery = """
# INSERT INTO dataTable (name) VALUES ("nurur")
# """

# myquery = """
# UPDATE dataTable SET name='iqbal' WHERE name='rashed'
# """
# pd.read_sql(myquery, conn)

In [12]:
myquery = """
WITH f1 as 
(SELECT COUNT(*) AS ALL_FLIGHT 
FROM dataTable),
f2 as (SELECT COUNT(*) CANCELLED_FLIGHT
FROM dataTable
WHERE was_cancelled='Y'),
f3 as (SELECT COUNT(*) SCHEDULED_FLIGHT
FROM dataTable
WHERE was_cancelled='N')
SELECT 
100*f2.CANCELLED_FLIGHT/f1.ALL_FLIGHT AS CANCELLED_PCT,
100*f3.SCHEDULED_FLIGHT/f1.ALL_FLIGHT AS SCHEDULED_PCT
FROM f1, f2, f3
"""
pd.read_sql(myquery, conn)

Unnamed: 0,CANCELLED_PCT,SCHEDULED_PCT
0,60,40


In [13]:
myquery = """
SELECT 
sum(id) as sum_id, 
sum(salary) as sum_salary
FROM dataTable
GROUP BY gender
"""
pd.read_sql(myquery, conn)

Unnamed: 0,sum_id,sum_salary
0,10,45
1,5,15


In [14]:
myquery = """
SELECT 
sum(id) as sum_id, 
sum(salary) as sum_salary
FROM dataTable
GROUP BY gender
HAVING sum_salary > 20
"""
pd.read_sql(myquery, conn)

Unnamed: 0,sum_id,sum_salary
0,10,45


### Window Functions

In [15]:
myquery = """
SELECT 
*,
sum(id) OVER (PARTITION BY gender) as sum_id,
sum(salary) OVER (PARTITION BY gender) as sum_salary
FROM dataTable
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name,sum_id,sum_salary
0,1,2,Y,F,15,rakib,10,45
1,2,3,N,F,20,foyez,10,45
2,4,5,N,F,10,iqbal,10,45
3,0,1,Y,M,10,zaid,5,15
4,3,4,Y,M,5,adnan,5,15


In [16]:
conn.close()