In [92]:
import sqlite3
import configparser
import pandas as pd

from sqlalchemy import create_engine

### Saving Data to SQLite DB from CSV

In [2]:
conn = sqlite3.connect('titanic_data.db')

df = pd.read_csv('data/titanic.csv')
df.to_sql('titanic_csv',  conn, if_exists='replace', index=False)
conn.close()

### Using Select

#### Select operation is used to fetch a required piece of information from the given data.

In [3]:
conn = sqlite3.connect('titanic_data.db')
select_df = pd.read_sql_query("SELECT * FROM titanic_csv", conn)
conn.close()

#### Note: Avoid using * if you don't need to get the all the information

In [5]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name, 
    Sex 
FROM titanic_csv
"""
select_df = pd.read_sql_query(query, conn)
conn.close()

select_df.head()

Unnamed: 0,Name,Sex
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female
4,"Allen, Mr. William Henry",male


### ALIAS or RENAMING A COLUMN

In [6]:
conn = sqlite3.connect('titanic_data.db')
query = """
SELECT 
    Name, 
    Sex as 'Gender'
FROM titanic_csv
"""
select_df = pd.read_sql_query(query, conn)
conn.close()
select_df.head()

Unnamed: 0,Name,Gender
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female
4,"Allen, Mr. William Henry",male


### Concat

In [25]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Ticket || "-" || Fare as "Ticket Fare"
FROM titanic_csv
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Ticket Fare
0,A/5 21171-7.25
1,PC 17599-71.2833
2,STON/O2. 3101282-7.925
3,113803-53.1
4,373450-8.05


### UPPER, LOWER

In [29]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    upper(NAME) as "Name to Upper", 
    lower(NAME) as "Name to Lower"
FROM titanic_csv
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name to Upper,Name to Lower
0,"BRAUND, MR. OWEN HARRIS","braund, mr. owen harris"
1,"CUMINGS, MRS. JOHN BRADLEY (FLORENCE BRIGGS TH...","cumings, mrs. john bradley (florence briggs th..."
2,"HEIKKINEN, MISS. LAINA","heikkinen, miss. laina"
3,"FUTRELLE, MRS. JACQUES HEATH (LILY MAY PEEL)","futrelle, mrs. jacques heath (lily may peel)"
4,"ALLEN, MR. WILLIAM HENRY","allen, mr. william henry"


### Length

In [33]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    length(NAME) as "NAME LENGTH",
    Name
FROM titanic_csv
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,NAME LENGTH,Name
0,23,"Braund, Mr. Owen Harris"
1,51,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,22,"Heikkinen, Miss. Laina"
3,44,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,24,"Allen, Mr. William Henry"


### DISTINCT

In [34]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    DISTINCT(Embarked)
FROM titanic_csv
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Embarked
0,S
1,C
2,Q
3,


### WHERE
#### This is a conditional operation we mostly use to find data values from the data that follows some condition. 

#### Available Operators:
* =
* >,>=, <,<=
* BETWEEN 
* IN
* LIKE

In [36]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name, 
    Sex as 'Gender'
FROM titanic_csv
WHERE Sex = :gender
"""

df = pd.read_sql(query, conn, params={"gender":"male"})

conn.close()
df.head()

Unnamed: 0,Name,Gender
0,"Braund, Mr. Owen Harris",male
1,"Allen, Mr. William Henry",male
2,"Moran, Mr. James",male
3,"McCarthy, Mr. Timothy J",male
4,"Palsson, Master. Gosta Leonard",male


### IS NOT NULL

In [39]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Distinct(Embarked)
FROM titanic_csv
WHERE Embarked IS NOT NULL
"""

df = pd.read_sql(query, conn, params={"gender":"male"})

conn.close()
df.head()

Unnamed: 0,Embarked
0,S
1,C
2,Q


### IS NULL

In [40]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Distinct(Embarked)
FROM titanic_csv
WHERE Embarked IS NULL
"""

df = pd.read_sql(query, conn, params={"gender":"male"})

conn.close()
df.head()

Unnamed: 0,Embarked
0,


### Greater than

In [41]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name,
    Fare
FROM titanic_csv
WHERE Fare > :fare
"""

df = pd.read_sql(query, conn, params={"fare":53})

conn.close()
df.head()

Unnamed: 0,Name,Fare
0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",71.2833
1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",53.1
2,"Fortune, Mr. Charles Alexander",263.0
3,"Spencer, Mrs. William Augustus (Marie Eugenie)",146.5208
4,"Meyer, Mr. Edgar Joseph",82.1708


### Less than

In [42]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name,
    Fare
FROM titanic_csv
WHERE Fare < :fare
"""

df = pd.read_sql(query, conn, params={"fare":10})

conn.close()
df.head()

Unnamed: 0,Name,Fare
0,"Braund, Mr. Owen Harris",7.25
1,"Heikkinen, Miss. Laina",7.925
2,"Allen, Mr. William Henry",8.05
3,"Moran, Mr. James",8.4583
4,"Saundercock, Mr. William Henry",8.05


### BETWEEN

In [75]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name,
    Fare,
    Age
FROM titanic_csv
WHERE Age BETWEEN :start_age AND :end_age
"""

df = pd.read_sql(query, conn, params={"start_age": 1, "end_age": 10})

conn.close()
df.head()

Unnamed: 0,Name,Fare,Age
0,"Palsson, Master. Gosta Leonard",21.075,2.0
1,"Sandstrom, Miss. Marguerite Rut",16.7,4.0
2,"Rice, Master. Eugene",29.125,2.0
3,"Palsson, Miss. Torborg Danira",21.075,8.0
4,"Laroche, Miss. Simonne Marie Anne Andree",41.5792,3.0


### IN

In [60]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name,
    Fare
FROM titanic_csv
WHERE Embarked IN ("S", "C", "Q")
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name,Fare
0,"Braund, Mr. Owen Harris",7.25
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",71.2833
2,"Heikkinen, Miss. Laina",7.925
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",53.1
4,"Allen, Mr. William Henry",8.05


In [63]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    Name, 
    Embarked 
FROM titanic_csv 
where Embarked IN (
    SELECT DISTINCT(Embarked) 
    FROM titanic_csv 
    where Embarked is NOT NULL and length(Embarked) > 0
)
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name,Embarked
0,"Braund, Mr. Owen Harris",S
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",C
2,"Heikkinen, Miss. Laina",S
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",S
4,"Allen, Mr. William Henry",S


### Case When

In [65]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT CASE
    WHEN fare < 100  THEN "Upper"
    WHEN fare BETWEEN 50 and 100 THEN "Middle"
    ELSE "Lower"
END AS 'fare_category'
FROM titanic_csv 
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,fare_category
0,Upper
1,Upper
2,Upper
3,Upper
4,Upper


### LIKE

##### Startswith

In [76]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME
FROM titanic_csv 
WHERE Name LIKE 'Leonard%%'
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name
0,"Leonard, Mr. Lionel"


#### Endswith

In [77]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME
FROM titanic_csv 
WHERE Name LIKE '%%Joseph'
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name
0,"Meyer, Mr. Edgar Joseph"
1,"Shorney, Mr. Charles Joseph"
2,"Peduzzi, Mr. Joseph"
3,"Murdlin, Mr. Joseph"
4,"Vande Velde, Mr. Johannes Joseph"


#### Anything with

In [80]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME
FROM titanic_csv 
WHERE Name LIKE '%%Edgar%%'
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name
0,"Meyer, Mr. Edgar Joseph"
1,"Andrew, Mr. Edgardo Samuel"
2,"Meyer, Mrs. Edgar Joseph (Leila Saks)"


### ORDER BY

In [81]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME
FROM titanic_csv 
ORDER BY FARE
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name
0,"Leonard, Mr. Lionel"
1,"Harrison, Mr. William"
2,"Tornquist, Mr. William Henry"
3,"Parkes, Mr. Francis ""Frank"""
4,"Johnson, Mr. William Cahoone Jr"


In [82]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME
FROM titanic_csv 
ORDER BY FARE desc
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name
0,"Ward, Miss. Anna"
1,"Cardeza, Mr. Thomas Drake Martinez"
2,"Lesurer, Mr. Gustave J"
3,"Fortune, Mr. Charles Alexander"
4,"Fortune, Miss. Mabel Helen"


In [84]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    NAME,
    FARE
FROM titanic_csv 
ORDER BY FARE desc, Name
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Name,Fare
0,"Cardeza, Mr. Thomas Drake Martinez",512.3292
1,"Lesurer, Mr. Gustave J",512.3292
2,"Ward, Miss. Anna",512.3292
3,"Fortune, Miss. Alice Elizabeth",263.0
4,"Fortune, Miss. Mabel Helen",263.0


### AGGREGATE

* COUNT
* AVG
* SUM
* MAX
* MIN

In [89]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT 
    COUNT(NAME) as 'Passenger Count', 
    AVG(AGE) as 'Average Age',
    MIN(AGE) as 'Minimum Age',
    MAX(AGE) as 'Maximum Age',
    SUM(Fare) as 'Total Fare'
FROM titanic_csv 
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Passenger Count,Average Age,Minimum Age,Maximum Age,Total Fare
0,891,29.699118,0.42,80.0,28693.9493


### GROUP BY

In [90]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT COUNT(SEX) AS 'Gender Count'
FROM titanic_csv 
GROUP BY SEX
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Gender Count
0,314
1,577


### HAVING

In [91]:
conn = sqlite3.connect('titanic_data.db')

query = """
SELECT AVG(Fare) AS 'Gender Count'
FROM titanic_csv 
GROUP BY Embarked
HAVING AVG(Fare) > 30
"""

df = pd.read_sql(query, conn)

conn.close()
df.head()

Unnamed: 0,Gender Count
0,80.0
1,59.954144
