# JOUR 2 — SQL : Import Titanic et requêtes de base
 
 ## Objectif : apprendre à importer un dataset dans SQLite et exécuter des requêtes SQL simples

In [8]:
# pandas : pour manipuler les données en DataFrame
import pandas as pd 
# sqlite3 : pour créer et interroger la base SQLite
import sqlite3

# Charger le dataset
df = pd.read_csv("../data/train.csv")


In [9]:
# Afficher les 5 premières lignes pour vérifier le chargement
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [11]:
# Créer la base SQLite (fichier titanic.db dans le dossier data)
conn = sqlite3.connect("../data/titanic.db")

# Créer la table 'passengers' avec le contenu du CSV
df.to_sql("passengers", conn, if_exists="replace", index=False)

# Vérifier que la table est bien créée
pd.read_sql("SELECT * FROM passengers LIMIT 5;", conn)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
# 1. Afficher certaines colonnes
pd.read_sql("SELECT Name, Sex, Age, Survived FROM passengers LIMIT 10;", conn)

Unnamed: 0,Name,Sex,Age,Survived
0,"Braund, Mr. Owen Harris",male,22.0,0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1
2,"Heikkinen, Miss. Laina",female,26.0,1
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1
4,"Allen, Mr. William Henry",male,35.0,0
5,"Moran, Mr. James",male,,0
6,"McCarthy, Mr. Timothy J",male,54.0,0
7,"Palsson, Master. Gosta Leonard",male,2.0,0
8,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,1
9,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1


In [4]:
# 2. Filtrer les femmes
pd.read_sql("SELECT Name, Age FROM passengers WHERE Sex='female' LIMIT 10;", conn)

Unnamed: 0,Name,Age
0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
1,"Heikkinen, Miss. Laina",26.0
2,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",27.0
4,"Nasser, Mrs. Nicholas (Adele Achem)",14.0
5,"Sandstrom, Miss. Marguerite Rut",4.0
6,"Bonnell, Miss. Elizabeth",58.0
7,"Vestrom, Miss. Hulda Amanda Adolfina",14.0
8,"Hewlett, Mrs. (Mary D Kingcome)",55.0
9,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",31.0


In [5]:
# 3. Trier par âge décroissant
pd.read_sql("SELECT Name, Age FROM passengers ORDER BY Age DESC LIMIT 10;", conn)


Unnamed: 0,Name,Age
0,"Barkworth, Mr. Algernon Henry Wilson",80.0
1,"Svensson, Mr. Johan",74.0
2,"Goldschmidt, Mr. George B",71.0
3,"Artagaveytia, Mr. Ramon",71.0
4,"Connors, Mr. Patrick",70.5
5,"Mitchell, Mr. Henry Michael",70.0
6,"Crosby, Capt. Edward Gifford",70.0
7,"Wheadon, Mr. Edward H",66.0
8,"Ostby, Mr. Engelhart Cornelius",65.0
9,"Duane, Mr. Frank",65.0


In [6]:
# 4. Survivants de plus de 50 ans
pd.read_sql("SELECT Name, Pclass, Age, Survived FROM passengers WHERE Age>50 AND Survived=1;", conn)


Unnamed: 0,Name,Pclass,Age,Survived
0,"Bonnell, Miss. Elizabeth",1,58.0,1
1,"Hewlett, Mrs. (Mary D Kingcome)",2,55.0,1
2,"Lurette, Miss. Elise",1,58.0,1
3,"Graham, Mrs. William Thompson (Edith Junkins)",1,58.0,1
4,"Andrews, Miss. Kornelia Theodosia",1,63.0,1
5,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",1,60.0,1
6,"Peuchen, Major. Arthur Godfrey",1,52.0,1
7,"Turkula, Mrs. (Hedwig)",3,63.0,1
8,"Eustis, Miss. Elizabeth Mussey",1,54.0,1
9,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",1,54.0,1


### Notes / Explications :

df = pd.read_csv(...)       → charge le CSV dans un DataFrame pandas
conn = sqlite3.connect(...) → crée une connexion à une base SQLite locale
df.to_sql(...)              → écrit le DataFrame dans la base SQLite sous le nom 'passengers'
pd.read_sql("SELECT ...")   → exécute une requête SQL et retourne un DataFrame

### Requêtes SQL :

SELECT colonne1, colonne2 FROM table → sélectionner des colonnes
WHERE condition                     → filtrer les lignes selon une condition
RDER BY colonne DESC               → trier par ordre décroissant
GROUP BY colonne                     → regrouper pour calculs (ex: COUNT, SUM)
