# Pandas SQLite3 

In [1]:
import pandas as pd
import numpy as np
import sqlite3

### Example 1

#### Create a Dataframe

In [2]:
was_cancelled = ['Y','Y','N','Y','N']

df = pd.DataFrame(
    {"id":[1,2,3,4,5] , 
     "was_cancelled" : ['Y','Y','N','Y','N'],
     "gender":['M','F','F','M','F'],
     "salary":[10,15,20,5,10],
     "name":['zaid ur','rakib ur','foyez ah','adnan ki','iqbal ah']
    }
)

#### Push Dataframe to the Database

In [3]:
conn = sqlite3.connect(':memory:')
df.to_sql('dataTable', conn)

5

#### Read the Table

In [4]:
pd.read_sql("SELECT COUNT(*) AS ALL_FLIGHT FROM dataTable", conn)

Unnamed: 0,ALL_FLIGHT
0,5


In [5]:
pd.read_sql("SELECT DISTINCT gender FROM dataTable", conn)

Unnamed: 0,gender
0,M
1,F


In [6]:
pd.read_sql("SELECT name, INSTR(name, 'a') as string_position FROM dataTable", conn)

Unnamed: 0,name,string_position
0,zaid ur,2
1,rakib ur,2
2,foyez ah,7
3,adnan ki,1
4,iqbal ah,4


In [7]:
pd.read_sql("SELECT name, REPLACE(name,' ','++') as white_space_removed FROM dataTable", conn)

Unnamed: 0,name,white_space_removed
0,zaid ur,zaid++ur
1,rakib ur,rakib++ur
2,foyez ah,foyez++ah
3,adnan ki,adnan++ki
4,iqbal ah,iqbal++ah


In [8]:
myquery = """
SELECT * FROM dataTable
WHERE name > 'adnan' AND name < 'zaid'
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,1,2,Y,F,15,rakib ur
1,2,3,N,F,20,foyez ah
2,3,4,Y,M,5,adnan ki
3,4,5,N,F,10,iqbal ah


In [9]:
myquery = """
SELECT * FROM dataTable
WHERE name BETWEEN 'adnan' AND 'zaid'
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,1,2,Y,F,15,rakib ur
1,2,3,N,F,20,foyez ah
2,3,4,Y,M,5,adnan ki
3,4,5,N,F,10,iqbal ah


In [10]:
myquery = """
SELECT * FROM dataTable
ORDER BY name DESC
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name
0,0,1,Y,M,10,zaid ur
1,1,2,Y,F,15,rakib ur
2,4,5,N,F,10,iqbal ah
3,2,3,N,F,20,foyez ah
4,3,4,Y,M,5,adnan ki


In [11]:
# myquery = """
# INSERT INTO dataTable (name) VALUES ("nurur")
# """

# myquery = """
# UPDATE dataTable SET name='iqbal' WHERE name='rashed'
# """
# pd.read_sql(myquery, conn)

In [12]:
myquery = """
WITH f1 as 
(SELECT COUNT(*) AS ALL_FLIGHT 
FROM dataTable),
f2 as (SELECT COUNT(*) CANCELLED_FLIGHT
FROM dataTable
WHERE was_cancelled='Y'),
f3 as (SELECT COUNT(*) SCHEDULED_FLIGHT
FROM dataTable
WHERE was_cancelled='N')
SELECT 
100*f2.CANCELLED_FLIGHT/f1.ALL_FLIGHT AS CANCELLED_PCT,
100*f3.SCHEDULED_FLIGHT/f1.ALL_FLIGHT AS SCHEDULED_PCT
FROM f1, f2, f3
"""
pd.read_sql(myquery, conn)

Unnamed: 0,CANCELLED_PCT,SCHEDULED_PCT
0,60,40


In [13]:
myquery = """
SELECT 
SUM(id) as sum_id, 
SUM(salary) as sum_salary
FROM dataTable
GROUP BY gender
"""
pd.read_sql(myquery, conn)

Unnamed: 0,sum_id,sum_salary
0,10,45
1,5,15


In [14]:
myquery = """
SELECT 
SUM(id) as sum_id, 
SUM(salary) as sum_salary
FROM dataTable
GROUP BY gender
HAVING sum_salary > 20
"""
pd.read_sql(myquery, conn)

Unnamed: 0,sum_id,sum_salary
0,10,45


## Example 2

#### Window Functions

In [15]:
myquery = """
SELECT 
*,
sum(id) OVER (PARTITION BY gender) as sum_id,
sum(salary) OVER (PARTITION BY gender) as sum_salary
FROM dataTable
"""
pd.read_sql(myquery, conn)

Unnamed: 0,index,id,was_cancelled,gender,salary,name,sum_id,sum_salary
0,1,2,Y,F,15,rakib ur,10,45
1,2,3,N,F,20,foyez ah,10,45
2,4,5,N,F,10,iqbal ah,10,45
3,0,1,Y,M,10,zaid ur,5,15
4,3,4,Y,M,5,adnan ki,5,15


## Example 3

#### Count Null Values

In [16]:
df = pd.DataFrame(
    {"id":[1,2,3,4,5] , 
     "was_cancelled" : ['Y','Y','N','Y','N'],
     "gender":['M','F','F',np.nan,'F'],
     "salary":[10,15,20,5,10],
     "name":[np.nan,'rakib','foyez',np.nan,'iqbal']
    }
)

df

Unnamed: 0,id,was_cancelled,gender,salary,name
0,1,Y,M,10,
1,2,Y,F,15,rakib
2,3,N,F,20,foyez
3,4,Y,,5,
4,5,N,F,10,iqbal


#### Push Dataframe to the Database

In [17]:
conn = sqlite3.connect(':memory:')
df.to_sql('nullTable', conn)

5

In [18]:
myquery= """
SELECT 
COUNT(*) as total_count,
(SELECT COUNT(*) FROM nullTable WHERE name IS NOT NULL) as notnull_count,
COUNT(*) - (SELECT COUNT(*) FROM nullTable WHERE name IS NOT NULL) as null_count
FROM nullTable
"""
pd.read_sql(myquery,conn)

Unnamed: 0,total_count,notnull_count,null_count
0,5,3,2


In [19]:
myquery= """
SELECT 
COUNT(*) as total_count,
SUM(CASE WHEN name IS NOT NULL THEN 1 ELSE 0 END) as notnull_count,
SUM(CASE WHEN name IS NULL THEN 1 ELSE 0 END) as null_count
FROM nullTable
"""
pd.read_sql(myquery,conn)

Unnamed: 0,total_count,notnull_count,null_count
0,5,3,2


Date, Date Difference, Julain Date

In [20]:
myquery ="""
SELECT 
DATE('1970-10-18') as date1, 
DATE()-DATE('1970-10-18') as date2, 
(julianday() - julianday('1970-10-18'))/365 as date3
"""
pd.read_sql(myquery,conn)

Unnamed: 0,date1,date2,date3
0,1970-10-18,52,51.975323


In [21]:
conn.close()