<a href="https://colab.research.google.com/github/sethkipsangmutuba/SQL/blob/main/1a.%20Querying_in_Notebooks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#SELECT and SELECT WHERE in Colab using Titanic Dataset

##Setup: Load Titanic into SQLite

In [45]:
import seaborn as sns
import sqlite3
import pandas as pd

# Load dataset and create SQLite database
df = sns.load_dataset("titanic")
conn = sqlite3.connect("titanic.db")
df.to_sql("titanic", conn, if_exists="replace", index=False)


891

##**Exploring the table: SELECT ***
View all columns (limited to 10 rows)

In [46]:
pd.read_sql("SELECT * FROM titanic LIMIT 10", conn)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,1,,Southampton,no,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,0,C,Cherbourg,yes,0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,0,,Southampton,yes,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,0,C,Southampton,yes,0
4,0,3,male,35.0,0,0,8.05,S,Third,man,1,,Southampton,no,1
5,0,3,male,,0,0,8.4583,Q,Third,man,1,,Queenstown,no,1
6,0,1,male,54.0,0,0,51.8625,S,First,man,1,E,Southampton,no,1
7,0,3,male,2.0,3,1,21.075,S,Third,child,0,,Southampton,no,0
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,0,,Southampton,yes,0
9,1,2,female,14.0,1,0,30.0708,C,Second,child,0,,Cherbourg,yes,0


## Unique values using `SELECT DISTINCT`

Use the `SELECT DISTINCT` statement to retrieve unique values from a column.

In [47]:
pd.read_sql("SELECT DISTINCT embark_town FROM titanic", conn)


Unnamed: 0,embark_town
0,Southampton
1,Cherbourg
2,Queenstown
3,


#Save into a new table Embark_Towns

In [48]:
conn.execute("CREATE TABLE IF NOT EXISTS Embark_Towns AS SELECT DISTINCT embark_town FROM titanic")
pd.read_sql("SELECT * FROM Embark_Towns", conn)


Unnamed: 0,embark_town
0,Southampton
1,Cherbourg
2,Queenstown
3,


##Selecting specific fields

Use the `SELECT` statement to retrieve specific columns from a table.

In [49]:
pd.read_sql("""
SELECT sex, age, survived
FROM titanic
WHERE sex = 'female' AND survived = 0
ORDER BY age
LIMIT 5
""", conn)


Unnamed: 0,sex,age,survived
0,female,,0
1,female,,0
2,female,,0
3,female,,0
4,female,,0


#SELECT WHERE IN – Match multiple values
Get passengers who embarked from either 'Cherbourg' or 'Southampton':

In [50]:
pd.read_sql("""
SELECT * FROM titanic
WHERE embark_town IN ('Cherbourg', 'Southampton')
LIMIT 10
""", conn)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,1,,Southampton,no,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,0,C,Cherbourg,yes,0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,0,,Southampton,yes,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,0,C,Southampton,yes,0
4,0,3,male,35.0,0,0,8.05,S,Third,man,1,,Southampton,no,1
5,0,1,male,54.0,0,0,51.8625,S,First,man,1,E,Southampton,no,1
6,0,3,male,2.0,3,1,21.075,S,Third,child,0,,Southampton,no,0
7,1,3,female,27.0,0,2,11.1333,S,Third,woman,0,,Southampton,yes,0
8,1,2,female,14.0,1,0,30.0708,C,Second,child,0,,Cherbourg,yes,0
9,1,3,female,4.0,1,1,16.7,S,Third,child,0,G,Southampton,yes,0


#SELECT WHERE IS NULL / IS NOT NULL – Handle missing data
Find passengers with missing age values:

In [51]:
pd.read_sql("""
SELECT * FROM titanic
WHERE age IS NULL
""", conn)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,,0,0,8.4583,Q,Third,man,1,,Queenstown,no,1
1,1,2,male,,0,0,13.0000,S,Second,man,1,,Southampton,yes,1
2,1,3,female,,0,0,7.2250,C,Third,woman,0,,Cherbourg,yes,1
3,0,3,male,,0,0,7.2250,C,Third,man,1,,Cherbourg,no,1
4,1,3,female,,0,0,7.8792,Q,Third,woman,0,,Queenstown,yes,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,0,3,male,,0,0,7.2292,C,Third,man,1,,Cherbourg,no,1
173,0,3,female,,8,2,69.5500,S,Third,woman,0,,Southampton,no,0
174,0,3,male,,0,0,9.5000,S,Third,man,1,,Southampton,no,1
175,0,3,male,,0,0,7.8958,S,Third,man,1,,Southampton,no,1


Find passengers with non-null age:

In [52]:
pd.read_sql("""
SELECT * FROM titanic
WHERE age IS NOT NULL
LIMIT 10
""", conn)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,1,,Southampton,no,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,0,C,Cherbourg,yes,0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,0,,Southampton,yes,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,0,C,Southampton,yes,0
4,0,3,male,35.0,0,0,8.05,S,Third,man,1,,Southampton,no,1
5,0,1,male,54.0,0,0,51.8625,S,First,man,1,E,Southampton,no,1
6,0,3,male,2.0,3,1,21.075,S,Third,child,0,,Southampton,no,0
7,1,3,female,27.0,0,2,11.1333,S,Third,woman,0,,Southampton,yes,0
8,1,2,female,14.0,1,0,30.0708,C,Second,child,0,,Cherbourg,yes,0
9,1,3,female,4.0,1,1,16.7,S,Third,child,0,G,Southampton,yes,0


#SELECT WHERE LIKE – Pattern matching
Find passengers whose names contain the substring "Mrs":

#SELECT WHERE BETWEEN – Filter within a range
Get passengers whose age is between 20 and 30:

In [53]:
pd.read_sql("""
SELECT sex, age, class
FROM titanic
WHERE age BETWEEN 20 AND 30
LIMIT 10
""", conn)


Unnamed: 0,sex,age,class
0,male,22.0,Third
1,female,26.0,Third
2,female,27.0,Third
3,male,20.0,Third
4,male,28.0,First
5,male,28.0,First
6,male,21.0,Third
7,female,27.0,Second
8,male,21.0,Third
9,female,29.0,Second


#SELECT WHERE AND / OR – Combine conditions
Find male passengers older than 60:

In [54]:
pd.read_sql("""
SELECT age, sex, class, fare
FROM titanic
WHERE sex = 'male' AND age > 60
""", conn)


Unnamed: 0,age,sex,class,fare
0,66.0,male,Second,10.5
1,65.0,male,First,61.9792
2,71.0,male,First,34.6542
3,70.5,male,Third,7.75
4,61.0,male,First,33.5
5,62.0,male,First,26.55
6,65.0,male,Third,7.75
7,61.0,male,Third,6.2375
8,64.0,male,First,263.0
9,65.0,male,First,26.55


#SELECT WHERE NOT – Exclude specific values
Get all passengers not in 1st class:

In [55]:
pd.read_sql("""
SELECT age, fare, sex, class
FROM titanic
WHERE fare > 100 OR age < 10
""", conn)


Unnamed: 0,age,fare,sex,class
0,2.00,21.0750,male,Third
1,4.00,16.7000,female,Third
2,2.00,29.1250,male,Third
3,8.00,21.0750,female,Third
4,19.00,263.0000,male,First
...,...,...,...,...
108,0.83,18.7500,male,Second
109,4.00,31.2750,male,Third
110,9.00,15.2458,female,Third
111,45.00,164.8667,female,First


#SELECT DISTINCT ... WHERE – Unique filtered values
Get unique embark_town values for survivors only:

In [56]:
pd.read_sql("""
SELECT DISTINCT embark_town FROM titanic
WHERE survived = 1
""", conn)


Unnamed: 0,embark_town
0,Cherbourg
1,Southampton
2,Queenstown
3,
