<a href="https://colab.research.google.com/github/sethkipsangmutuba/SQL/blob/main/1d.%20Reading_data_across_multiple_tables.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

setup

In [241]:
import sqlite3
import pandas as pd
import seaborn as sns

# Load Titanic dataset
df = sns.load_dataset('titanic')

# Add passenger_id to simulate primary key
df = df.reset_index().rename(columns={"index": "passenger_id"})

# Create SQLite in-memory DB
conn = sqlite3.connect(":memory:")

# Create and insert two tables
passengers = df[['passenger_id', 'who', 'sex', 'age']]
embark_info = df[['passenger_id', 'embark_town', 'fare', 'class']]

passengers.to_sql("passengers", conn, index=False, if_exists='replace')
embark_info.to_sql("embark_info", conn, index=False, if_exists='replace')


891

Read a single column from a single table

In [242]:
pd.read_sql("""
SELECT sex
FROM passengers
LIMIT 10;
""", conn)


Unnamed: 0,sex
0,male
1,female
2,female
3,female
4,male
5,male
6,male
7,male
8,female
9,female


Read multiple columns from a single table

In [243]:
pd.read_sql("""
SELECT who, age, sex
FROM passengers
LIMIT 10;
""", conn)


Unnamed: 0,who,age,sex
0,man,22.0,male
1,woman,38.0,female
2,woman,26.0,female
3,woman,35.0,female
4,man,35.0,male
5,man,,male
6,man,54.0,male
7,child,2.0,male
8,woman,27.0,female
9,child,14.0,female


Read multiple columns from multiple tables

Here we use a JOIN on passenger_id.

In [244]:
pd.read_sql("""
SELECT passengers.passenger_id, passengers.who, passengers.age, embark_info.embark_town, embark_info.fare
FROM passengers
JOIN embark_info
ON passengers.passenger_id = embark_info.passenger_id
LIMIT 10;
""", conn)


Unnamed: 0,passenger_id,who,age,embark_town,fare
0,0,man,22.0,Southampton,7.25
1,1,woman,38.0,Cherbourg,71.2833
2,2,woman,26.0,Southampton,7.925
3,3,woman,35.0,Southampton,53.1
4,4,man,35.0,Southampton,8.05
5,5,man,,Queenstown,8.4583
6,6,man,54.0,Southampton,51.8625
7,7,child,2.0,Southampton,21.075
8,8,woman,27.0,Southampton,11.1333
9,9,child,14.0,Cherbourg,30.0708


JOIN with WHERE clause

In [245]:
pd.read_sql("""
SELECT p.who, e.embark_town, e.fare
FROM passengers p, embark_info e
WHERE p.passenger_id = e.passenger_id
LIMIT 10;
""", conn)


Unnamed: 0,who,embark_town,fare
0,man,Southampton,7.25
1,woman,Cherbourg,71.2833
2,woman,Southampton,7.925
3,woman,Southampton,53.1
4,man,Southampton,8.05
5,man,Queenstown,8.4583
6,man,Southampton,51.8625
7,child,Southampton,21.075
8,woman,Southampton,11.1333
9,child,Cherbourg,30.0708


Join with filtering (e.g., passengers who embarked from Queenstown)

In [246]:
pd.read_sql("""
SELECT p.sex, p.age, e.embark_town, e.fare
FROM passengers p
JOIN embark_info e
ON p.passenger_id = e.passenger_id
WHERE e.embark_town = 'Queenstown'
LIMIT 10;
""", conn)


Unnamed: 0,sex,age,embark_town,fare
0,male,,Queenstown,8.4583
1,male,2.0,Queenstown,29.125
2,female,15.0,Queenstown,8.0292
3,female,,Queenstown,7.8792
4,female,,Queenstown,7.75
5,female,19.0,Queenstown,7.8792
6,male,,Queenstown,15.5
7,female,,Queenstown,7.75
8,female,,Queenstown,7.7875
9,female,,Queenstown,24.15


SETUP: Load Titanic Dataset and Simulate Multiple Tables

In [247]:
import seaborn as sns
import pandas as pd
import sqlite3

# Load the Titanic dataset
df = sns.load_dataset('titanic')
df.reset_index(inplace=True)
df.rename(columns={'index': 'passenger_id'}, inplace=True)

# Simulate normalized schema by splitting into logical tables
passenger_info = df[['passenger_id', 'sex', 'age', 'class', 'fare', 'who']]
embark_info    = df[['passenger_id', 'embarked', 'embark_town', 'deck']]
survival_info  = df[['passenger_id', 'survived', 'pclass', 'alone']]
family_info    = df[['passenger_id', 'sibsp', 'parch', 'adult_male']]

# Save to in-memory SQLite database
conn = sqlite3.connect(':memory:')
passenger_info.to_sql('passenger_info', conn, index=False, if_exists='replace')
embark_info.to_sql('embark_info', conn, index=False, if_exists='replace')
survival_info.to_sql('survival_info', conn, index=False, if_exists='replace')
family_info.to_sql('family_info', conn, index=False, if_exists='replace')


891

Reading Data Across Multiple Tables

Basic JOIN — Age and Embark Town

In [248]:
pd.read_sql("""
SELECT pi.passenger_id, pi.age, ei.embark_town
FROM passenger_info pi
JOIN embark_info ei ON pi.passenger_id = ei.passenger_id
LIMIT 10;
""", conn)


Unnamed: 0,passenger_id,age,embark_town
0,0,22.0,Southampton
1,1,38.0,Cherbourg
2,2,26.0,Southampton
3,3,35.0,Southampton
4,4,35.0,Southampton
5,5,,Queenstown
6,6,54.0,Southampton
7,7,2.0,Southampton
8,8,27.0,Southampton
9,9,14.0,Cherbourg


 JOIN 3 Tables — Age, Embark Town, and Survival

In [249]:
pd.read_sql("""
SELECT pi.age, ei.embark_town, si.survived
FROM passenger_info pi
JOIN embark_info ei ON pi.passenger_id = ei.passenger_id
JOIN survival_info si ON pi.passenger_id = si.passenger_id
WHERE pi.age IS NOT NULL
LIMIT 10;
""", conn)


Unnamed: 0,age,embark_town,survived
0,22.0,Southampton,0
1,38.0,Cherbourg,1
2,26.0,Southampton,1
3,35.0,Southampton,1
4,35.0,Southampton,0
5,54.0,Southampton,0
6,2.0,Southampton,0
7,27.0,Southampton,1
8,14.0,Cherbourg,1
9,4.0,Southampton,1


Join 4 Tables — Siblings, Class, Fare, Survival

In [250]:
pd.read_sql("""
SELECT pi.sex, fi.sibsp, pi.class, pi.fare, si.survived
FROM passenger_info pi
JOIN family_info fi ON pi.passenger_id = fi.passenger_id
JOIN survival_info si ON pi.passenger_id = si.passenger_id
WHERE fi.sibsp >= 1
ORDER BY fi.sibsp DESC
LIMIT 10;
""", conn)


Unnamed: 0,sex,sibsp,class,fare,survived
0,male,8,Third,69.55,0
1,female,8,Third,69.55,0
2,male,8,Third,69.55,0
3,male,8,Third,69.55,0
4,female,8,Third,69.55,0
5,male,8,Third,69.55,0
6,female,8,Third,69.55,0
7,male,5,Third,46.9,0
8,female,5,Third,46.9,0
9,male,5,Third,46.9,0


Aggregation Across Tables — Average Fare by Embark Town

In [251]:
pd.read_sql("""
SELECT ei.embark_town, ROUND(AVG(pi.fare), 2) AS avg_fare
FROM passenger_info pi
JOIN embark_info ei ON pi.passenger_id = ei.passenger_id
WHERE pi.fare IS NOT NULL
GROUP BY ei.embark_town;
""", conn)


Unnamed: 0,embark_town,avg_fare
0,,80.0
1,Cherbourg,59.95
2,Queenstown,13.28
3,Southampton,27.08


Filter with IN — Passengers from specific towns

In [252]:
pd.read_sql("""
SELECT pi.passenger_id, pi.age, ei.embark_town
FROM passenger_info pi
JOIN embark_info ei ON pi.passenger_id = ei.passenger_id
WHERE ei.embark_town IN ('Cherbourg', 'Queenstown')
LIMIT 10;
""", conn)


Unnamed: 0,passenger_id,age,embark_town
0,1,38.0,Cherbourg
1,5,,Queenstown
2,9,14.0,Cherbourg
3,16,2.0,Queenstown
4,19,,Cherbourg
5,22,15.0,Queenstown
6,26,,Cherbourg
7,28,,Queenstown
8,30,40.0,Cherbourg
9,31,,Cherbourg


IS NULL — Deck Unknown

In [253]:
pd.read_sql("""
SELECT pi.passenger_id, pi.age, ei.deck
FROM passenger_info pi
JOIN embark_info ei ON pi.passenger_id = ei.passenger_id
WHERE ei.deck IS NULL
LIMIT 10;
""", conn)


Unnamed: 0,passenger_id,age,deck
0,0,22.0,
1,2,26.0,
2,4,35.0,
3,5,,
4,7,2.0,
5,8,27.0,
6,9,14.0,
7,12,20.0,
8,13,39.0,
9,14,14.0,


Subquery — Passengers older than the average

In [254]:
pd.read_sql("""
SELECT passenger_id, age
FROM passenger_info
WHERE age > (SELECT AVG(age) FROM passenger_info)
ORDER BY age DESC
LIMIT 10;
""", conn)


Unnamed: 0,passenger_id,age
0,630,80.0
1,851,74.0
2,96,71.0
3,493,71.0
4,116,70.5
5,672,70.0
6,745,70.0
7,33,66.0
8,54,65.0
9,280,65.0


 Correlated JOIN — Adults who were not alone and survived

In [255]:
pd.read_sql("PRAGMA table_info(family_info);", conn)


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,passenger_id,INTEGER,0,,0
1,1,sibsp,INTEGER,0,,0
2,2,parch,INTEGER,0,,0
3,3,adult_male,INTEGER,0,,0


In [256]:
pd.read_sql("""
SELECT pi.sex, pi.age, si.survived, (fi.sibsp + fi.parch) AS family_size
FROM passenger_info pi
JOIN survival_info si ON pi.passenger_id = si.passenger_id
JOIN family_info fi ON pi.passenger_id = fi.passenger_id
WHERE pi.age >= 18 AND (fi.sibsp + fi.parch) > 0 AND si.survived = 1
LIMIT 10;
""", conn)


Unnamed: 0,sex,age,survived,family_size
0,female,38.0,1,1
1,female,35.0,1,1
2,female,27.0,1,2
3,female,38.0,1,6
4,female,49.0,1,1
5,female,29.0,1,1
6,female,33.0,1,3
7,female,23.0,1,5
8,male,23.0,1,1
9,female,34.0,1,1
