In [1]:
import pandas as pd
import numpy as np
import sqlite3
import os

In [2]:
airlines = pd.read_csv("nycflights13_airlines.csv.gz", comment="#")
airports = pd.read_csv("nycflights13_airports.csv.gz", comment="#")
flights = pd.read_csv("nycflights13_flights.csv.gz", comment="#")
planes = pd.read_csv("nycflights13_planes.csv.gz", comment="#")
weather = pd.read_csv("nycflights13_weather.csv.gz", comment="#")

Baza danych to plik (w naszym przypadku), i baza danych to zbiór tabel.

In [3]:
db_name = 'nycflights.db'
con = sqlite3.connect(db_name) # tworzenie lub łączenie się z bazą danych

airlines.to_sql("airlines", con, if_exists='replace', index=False) # wrzucanie tabel do bazy danych
airports.to_sql("airports", con, if_exists='replace', index=False)
flights.to_sql("flights", con, if_exists='replace', index=False)
planes.to_sql("planes", con, if_exists='replace', index=False)
weather.to_sql("weather", con, if_exists='replace', index=False)

# Wybór kolumn

In [4]:
sql_result = pd.read_sql_query("SELECT * FROM planes", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N102UW,1998.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
2,N103US,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
3,N104UW,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
3317,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3318,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142,,Turbo-fan
3319,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3320,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142,,Turbo-jet


In [7]:
sql_result = pd.read_sql_query("SELECT type, model FROM planes", con)
sql_result

Unnamed: 0,type,model
0,Fixed wing multi engine,EMB-145XR
1,Fixed wing multi engine,A320-214
2,Fixed wing multi engine,A320-214
3,Fixed wing multi engine,A320-214
4,Fixed wing multi engine,EMB-145LR
...,...,...
3317,Fixed wing multi engine,717-200
3318,Fixed wing multi engine,MD-88
3319,Fixed wing multi engine,717-200
3320,Fixed wing multi engine,MD-88


In [6]:
sql_result = pd.read_sql_query("SELECT model, type FROM planes", con)
sql_result

Unnamed: 0,model,type
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine
...,...,...
3317,717-200,Fixed wing multi engine
3318,MD-88,Fixed wing multi engine
3319,717-200,Fixed wing multi engine
3320,MD-88,Fixed wing multi engine


In [8]:
# można dospecyfikować, z której tabeli chcemy wziąć kolumnę
# ma sens, gdy w zapytaniu jest więcej tabel i nazwy kolumn się powtarzają
sql_result = pd.read_sql_query("SELECT planes.model, planes.type FROM planes", con)
sql_result

Unnamed: 0,model,type
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine
...,...,...
3317,717-200,Fixed wing multi engine
3318,MD-88,Fixed wing multi engine
3319,717-200,Fixed wing multi engine
3320,MD-88,Fixed wing multi engine


### Aliasy

In [9]:
# aliasy na kolumny
sql_result = pd.read_sql_query("SELECT planes.model as modelik, planes.type as typik FROM planes", con)
sql_result

Unnamed: 0,modelik,typik
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine
...,...,...
3317,717-200,Fixed wing multi engine
3318,MD-88,Fixed wing multi engine
3319,717-200,Fixed wing multi engine
3320,MD-88,Fixed wing multi engine


In [11]:
# alias na tabele
# nie mozna uzyc oryginalnej nazwy
sql_result = pd.read_sql_query("SELECT p.model, p.type FROM planes as p", con)
sql_result

Unnamed: 0,model,type
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine
...,...,...
3317,717-200,Fixed wing multi engine
3318,MD-88,Fixed wing multi engine
3319,717-200,Fixed wing multi engine
3320,MD-88,Fixed wing multi engine


### LIMIT

In [12]:
# Limit to odpowiednik head() z Pandasa
# w innych silnikach czasem "top"
sql_result = pd.read_sql_query("SELECT p.model as modelik, p.type as typek FROM planes as p LIMIT 5", con)
sql_result

Unnamed: 0,modelik,typek
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine


### Dygresja: łamanie zapytania

In [13]:
# napis wielolinijkowy
sql_result = pd.read_sql_query("""
SELECT p.model as modelik, p.type as typek 
FROM planes as p 
LIMIT 5
""", con)
sql_result

Unnamed: 0,modelik,typek
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine


In [14]:
# mniej wygodnie, z łamaniem wierszy
sql_result = pd.read_sql_query(
"SELECT p.model as modelik, p.type as typek \
FROM planes as p \
LIMIT 5",
con)
sql_result

Unnamed: 0,modelik,typek
0,EMB-145XR,Fixed wing multi engine
1,A320-214,Fixed wing multi engine
2,A320-214,Fixed wing multi engine
3,A320-214,Fixed wing multi engine
4,EMB-145LR,Fixed wing multi engine


# Where

In [15]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year >= 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N150UW,2013.0,Fixed wing multi engine,AIRBUS,A321-211,2,199,,Turbo-fan
1,N151UW,2013.0,Fixed wing multi engine,AIRBUS,A321-211,2,199,,Turbo-fan
2,N152UW,2013.0,Fixed wing multi engine,AIRBUS,A321-211,2,199,,Turbo-fan
3,N153UW,2013.0,Fixed wing multi engine,AIRBUS,A321-211,2,199,,Turbo-fan
4,N154UW,2013.0,Fixed wing multi engine,AIRBUS,A321-211,2,199,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
182,N8620H,2013.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
183,N8621A,2013.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
184,N903JB,2013.0,Fixed wing multi engine,AIRBUS,A321-231,2,379,,Turbo-fan
185,N907JB,2013.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A321-231,2,379,,Turbo-fan


In [16]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year = 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N20904,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
1,N26906,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
2,N27901,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
3,N28457,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
4,N34455,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
90,N8601C,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
91,N8602F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
92,N8603F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
93,N8604K,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan


In [17]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year == 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N20904,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
1,N26906,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
2,N27901,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
3,N28457,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
4,N34455,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
90,N8601C,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
91,N8602F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
92,N8603F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
93,N8604K,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan


In [18]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year != 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N102UW,1998.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
2,N103US,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
3,N104UW,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
3152,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3153,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142,,Turbo-fan
3154,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3155,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142,,Turbo-jet


In [19]:
# nie równa się
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year <> 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N102UW,1998.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
2,N103US,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
3,N104UW,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
3152,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3153,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142,,Turbo-fan
3154,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3155,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142,,Turbo-jet


## Składanie warunków

In [21]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year >= 2010 AND year <= 2011", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N127UW,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
1,N128UW,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
2,N205FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
3,N206FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
4,N207FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
109,N965WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
110,N966WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
111,N967WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
112,N968WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan


In [22]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year BETWEEN 2010 AND 2011", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N127UW,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
1,N128UW,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
2,N205FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
3,N206FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
4,N207FR,2010.0,Fixed wing multi engine,AIRBUS,A320-214,2,182,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
109,N965WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
110,N966WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
111,N967WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan
112,N968WN,2011.0,Fixed wing multi engine,BOEING,737-7H4,2,140,,Turbo-fan


In [23]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE year <= 1990 OR year >= 2000", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
2,N11106,2002.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
3,N11107,2002.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
4,N11109,2002.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
2360,N994AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
2361,N995AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
2362,N996AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
2363,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan


In [24]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE NOT year != 2012", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N20904,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
1,N26906,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
2,N27901,2012.0,Fixed wing multi engine,BOEING,787-8,2,260,,Turbo-fan
3,N28457,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
4,N34455,2012.0,Fixed wing multi engine,BOEING,737-924ER,2,191,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
90,N8601C,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
91,N8602F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
92,N8603F,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
93,N8604K,2012.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan


## NULL

In [25]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE speed IS NOT NULL", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N201AA,1959.0,Fixed wing single engine,CESSNA,150,1,2,90.0,Reciprocating
1,N202AA,1980.0,Fixed wing multi engine,CESSNA,421C,2,8,90.0,Reciprocating
2,N350AA,1980.0,Fixed wing multi engine,PIPER,PA-31-350,2,8,162.0,Reciprocating
3,N364AA,1973.0,Fixed wing multi engine,CESSNA,310Q,2,6,167.0,Reciprocating
4,N378AA,1963.0,Fixed wing single engine,CESSNA,172E,1,4,105.0,Reciprocating
5,N381AA,1956.0,Fixed wing multi engine,DOUGLAS,DC-7BF,4,102,232.0,Reciprocating
6,N425AA,1968.0,Fixed wing single engine,PIPER,PA-28-180,1,4,107.0,Reciprocating
7,N508AA,1975.0,Rotorcraft,BELL,206B,1,5,112.0,Turbo-shaft
8,N519MQ,1983.0,Fixed wing single engine,CESSNA,A185F,1,6,127.0,Reciprocating
9,N525AA,1980.0,Fixed wing multi engine,PIPER,PA-31-350,2,8,162.0,Reciprocating


In [26]:
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE speed IS NULL", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N102UW,1998.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
2,N103US,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
3,N104UW,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
3294,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3295,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142,,Turbo-fan
3296,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
3297,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142,,Turbo-jet


## IN

In [27]:
sql_result = pd.read_sql_query("""
SELECT * FROM planes
WHERE manufacturer IN ('EMBRAER', 'AIRBUS INDUSTRIE')""", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N10156,2004.0,Fixed wing multi engine,EMBRAER,EMB-145XR,2,55,,Turbo-fan
1,N102UW,1998.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
2,N103US,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
3,N104UW,1999.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A320-214,2,182,,Turbo-fan
4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
...,...,...,...,...,...,...,...,...,...
694,N959UW,2008.0,Fixed wing multi engine,EMBRAER,ERJ 190-100 IGW,2,20,,Turbo-fan
695,N961UW,2008.0,Fixed wing multi engine,EMBRAER,ERJ 190-100 IGW,2,20,,Turbo-fan
696,N963UW,2008.0,Fixed wing multi engine,EMBRAER,ERJ 190-100 IGW,2,20,,Turbo-fan
697,N965UW,2008.0,Fixed wing multi engine,EMBRAER,ERJ 190-100 IGW,2,20,,Turbo-fan


# DISTINCT

In [28]:
# unikatowe wartości w kolumnie year
sql_result = pd.read_sql_query("SELECT DISTINCT year FROM \
                               planes", con)
sql_result

Unnamed: 0,year
0,2004.0
1,1998.0
2,1999.0
3,2002.0
4,2003.0
5,2005.0
6,2006.0
7,2000.0
8,2001.0
9,1994.0


In [29]:
# unikatowe pary w kolumnach type, manufacturer 
sql_result = pd.read_sql_query("SELECT DISTINCT type, manufacturer FROM \
                               planes", con)
sql_result

Unnamed: 0,type,manufacturer
0,Fixed wing multi engine,EMBRAER
1,Fixed wing multi engine,AIRBUS INDUSTRIE
2,Fixed wing multi engine,BOEING
3,Fixed wing multi engine,AIRBUS
4,Fixed wing multi engine,BOMBARDIER INC
5,Fixed wing single engine,CESSNA
6,Fixed wing multi engine,CESSNA
7,Fixed wing single engine,JOHN G HESS
8,Fixed wing multi engine,GULFSTREAM AEROSPACE
9,Rotorcraft,SIKORSKY


# LIKE

Służy do dopasowywania się do napisów w warunkach logicznych.

`%` to dowolny, być może pusty, ciąg znaków

In [30]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine
1,Fixed wing single engine
2,Rotorcraft


In [31]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE 'engine'", con)
sql_result

Unnamed: 0,type


In [32]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE 'Fixed wing multi engine'", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine


In [33]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE '%engine'", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine
1,Fixed wing single engine


In [34]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE '%wing'", con)
sql_result

Unnamed: 0,type


In [35]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE '%wing%'", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine
1,Fixed wing single engine


In [36]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE '%engine%'", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine
1,Fixed wing single engine


`_` - pojedynczy znak

In [37]:
sql_result = pd.read_sql_query("SELECT DISTINCT type FROM planes WHERE type LIKE '%_ngin_'", con)
sql_result

Unnamed: 0,type
0,Fixed wing multi engine
1,Fixed wing single engine


In [40]:
# wybieram z dokładnie 6 znakami
sql_result = pd.read_sql_query("SELECT * FROM planes WHERE manufacturer LIKE '______'", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N11206,2000.0,Fixed wing multi engine,BOEING,737-824,2,149,,Turbo-fan
1,N1200K,1998.0,Fixed wing multi engine,BOEING,767-332,2,330,,Turbo-fan
2,N1201P,1998.0,Fixed wing multi engine,BOEING,767-332,2,330,,Turbo-fan
3,N12109,1994.0,Fixed wing multi engine,BOEING,757-224,2,178,,Turbo-jet
4,N12114,1995.0,Fixed wing multi engine,BOEING,757-224,2,178,,Turbo-jet
...,...,...,...,...,...,...,...,...,...
1970,N994AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
1971,N995AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
1972,N996AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan
1973,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100,,Turbo-fan


# Sortowanie

In [41]:
# domyślnie sortuje rosnąco (ascending)
sql_result = pd.read_sql_query("""
SELECT * FROM planes ORDER BY year
""", con)
sql_result

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N14558,,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
1,N15555,,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
2,N15574,,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55,,Turbo-fan
3,N174US,,Fixed wing multi engine,AIRBUS INDUSTRIE,A321-211,2,199,,Turbo-jet
4,N177US,,Fixed wing multi engine,AIRBUS INDUSTRIE,A321-211,2,199,,Turbo-jet
...,...,...,...,...,...,...,...,...,...
3317,N8620H,2013.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
3318,N8621A,2013.0,Fixed wing multi engine,BOEING,737-8H4,2,140,,Turbo-fan
3319,N903JB,2013.0,Fixed wing multi engine,AIRBUS,A321-231,2,379,,Turbo-fan
3320,N907JB,2013.0,Fixed wing multi engine,AIRBUS INDUSTRIE,A321-231,2,379,,Turbo-fan


In [42]:
# order by występuje na końcu zapytania
sql_result = pd.read_sql_query("""
SELECT DISTINCT year
FROM planes 
WHERE manufacturer = 'BOEING' 
ORDER BY year""", con)
sql_result

Unnamed: 0,year
0,
1,1965.0
2,1984.0
3,1985.0
4,1986.0
5,1987.0
6,1988.0
7,1989.0
8,1990.0
9,1991.0


In [46]:
# domyślnie sortuje rosnąco (ascending, ASC)
# teoretycznie możemy pisać ASC w zapytaniu, ale nie ma potrzeby
# jeśli chcemy zmienić kierunek sortowania na malejący, to dopisujemy DESC jak descending
sql_result = pd.read_sql_query("""
SELECT DISTINCT year
FROM planes 
WHERE manufacturer = 'BOEING' 
ORDER BY year DESC""", con)
sql_result

Unnamed: 0,year
0,2013.0
1,2012.0
2,2011.0
3,2010.0
4,2009.0
5,2008.0
6,2007.0
7,2006.0
8,2005.0
9,2004.0


In [51]:
# sortujemy po pierwszej kolumnie globalnie, jak jest na niej remis, to sortujemy
# po kolejnej kolumnie
sql_result = pd.read_sql_query("""
SELECT DISTINCT year, manufacturer
FROM planes
ORDER BY year DESC, manufacturer""", con)
sql_result.head(10)

Unnamed: 0,year,manufacturer
0,2013.0,AIRBUS
1,2013.0,AIRBUS INDUSTRIE
2,2013.0,BOEING
3,2013.0,BOMBARDIER INC
4,2013.0,EMBRAER
5,2012.0,AIRBUS
6,2012.0,BOEING
7,2012.0,EMBRAER
8,2012.0,ROBINSON HELICOPTER CO
9,2011.0,AIRBUS


# Grupowanie

In [52]:
sql_result = pd.read_sql_query("""
SELECT manufacturer, COUNT(*)
FROM planes
GROUP BY manufacturer
""", con)
sql_result

Unnamed: 0,manufacturer,COUNT(*)
0,AGUSTA SPA,1
1,AIRBUS,336
2,AIRBUS INDUSTRIE,400
3,AMERICAN AIRCRAFT INC,2
4,AVIAT AIRCRAFT INC,1
5,AVIONS MARCEL DASSAULT,1
6,BARKER JACK L,1
7,BEECH,2
8,BELL,2
9,BOEING,1630


In [53]:
# count(*) - ile wierszy jest w grupie
# count(kolumna) - ile wierszy w grupie ma niebrakującą wartość w kolumnie "kolumna"
sql_result = pd.read_sql_query("""
SELECT manufacturer, COUNT(speed)
FROM planes
GROUP BY manufacturer
""", con)
sql_result

Unnamed: 0,manufacturer,COUNT(speed)
0,AGUSTA SPA,0
1,AIRBUS,0
2,AIRBUS INDUSTRIE,0
3,AMERICAN AIRCRAFT INC,0
4,AVIAT AIRCRAFT INC,0
5,AVIONS MARCEL DASSAULT,0
6,BARKER JACK L,0
7,BEECH,1
8,BELL,1
9,BOEING,0


In [54]:
sql_result = pd.read_sql_query("""
SELECT manufacturer, COUNT(*) AS licznosc
FROM planes
GROUP BY manufacturer
""", con)
sql_result

Unnamed: 0,manufacturer,licznosc
0,AGUSTA SPA,1
1,AIRBUS,336
2,AIRBUS INDUSTRIE,400
3,AMERICAN AIRCRAFT INC,2
4,AVIAT AIRCRAFT INC,1
5,AVIONS MARCEL DASSAULT,1
6,BARKER JACK L,1
7,BEECH,2
8,BELL,2
9,BOEING,1630


In [57]:
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, COUNT(*) AS licznosc
FROM planes
GROUP BY manufacturer, engine
""", con)
sql_result

Unnamed: 0,manufacturer,engine,licznosc
0,AGUSTA SPA,Turbo-shaft,1
1,AIRBUS,Turbo-fan,331
2,AIRBUS,Turbo-jet,5
3,AIRBUS INDUSTRIE,Turbo-fan,270
4,AIRBUS INDUSTRIE,Turbo-jet,130
5,AMERICAN AIRCRAFT INC,Reciprocating,2
6,AVIAT AIRCRAFT INC,Reciprocating,1
7,AVIONS MARCEL DASSAULT,Turbo-fan,1
8,BARKER JACK L,Reciprocating,1
9,BEECH,Turbo-prop,2


In [56]:
# o ile nie wiemy dokładnie co robimy,
# nie wstawiamy do SELECTa dowolnych kolumn
# Wstawiamy tylko kolumny po których grupujemy
# i funkcje agregujące
# Taka "dowolna" kolumna, jak year w przykładzie, daje nam losową
# wartość z tej kolumny dla jakiegoś wiersza w grupie (ani nie najmniejszą,
# ani największą)
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, year, COUNT(*) AS licznosc
FROM planes
GROUP BY manufacturer, engine
""", con)
sql_result

Unnamed: 0,manufacturer,engine,year,licznosc
0,AGUSTA SPA,Turbo-shaft,2001.0,1
1,AIRBUS,Turbo-fan,2009.0,331
2,AIRBUS,Turbo-jet,2002.0,5
3,AIRBUS INDUSTRIE,Turbo-fan,1998.0,270
4,AIRBUS INDUSTRIE,Turbo-jet,2001.0,130
5,AMERICAN AIRCRAFT INC,Reciprocating,,2
6,AVIAT AIRCRAFT INC,Reciprocating,2007.0,1
7,AVIONS MARCEL DASSAULT,Turbo-fan,1986.0,1
8,BARKER JACK L,Reciprocating,,1
9,BEECH,Turbo-prop,1972.0,2


In [58]:
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year), AVG(year), MAX(year)
FROM planes
GROUP BY manufacturer, engine
""", con)
sql_result

Unnamed: 0,manufacturer,engine,MIN(year),AVG(year),MAX(year)
0,AGUSTA SPA,Turbo-shaft,2001.0,2001.0,2001.0
1,AIRBUS,Turbo-fan,2002.0,2007.281734,2013.0
2,AIRBUS,Turbo-jet,2002.0,2002.0,2002.0
3,AIRBUS INDUSTRIE,Turbo-fan,1992.0,1998.599251,2013.0
4,AIRBUS INDUSTRIE,Turbo-jet,1989.0,1997.439024,2004.0
5,AMERICAN AIRCRAFT INC,Reciprocating,,,
6,AVIAT AIRCRAFT INC,Reciprocating,2007.0,2007.0,2007.0
7,AVIONS MARCEL DASSAULT,Turbo-fan,1986.0,1986.0,1986.0
8,BARKER JACK L,Reciprocating,,,
9,BEECH,Turbo-prop,1967.0,1969.5,1972.0


In [59]:
# HAVING to taki trochę WHERE, ale wykonuje się PO pogrupowaniu
# Używamy go żeby filtrować wyniki funkcji agregujących
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year), AVG(year)
FROM planes
GROUP BY manufacturer, engine
HAVING MIN(year) > 2000 AND AVG(year) > 2002
""", con)
sql_result

Unnamed: 0,manufacturer,engine,MIN(year),AVG(year)
0,AIRBUS,Turbo-fan,2002.0,2007.281734
1,AVIAT AIRCRAFT INC,Reciprocating,2007.0,2007.0
2,CIRRUS DESIGN CORP,Reciprocating,2007.0,2007.0
3,FRIEDEMANN JON,Reciprocating,2007.0,2007.0
4,ROBINSON HELICOPTER CO,Turbo-shaft,2012.0,2012.0


In [60]:
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year) as minimum, AVG(year) as srednia
FROM planes
GROUP BY manufacturer, engine
HAVING minimum > 2000 AND srednia > 2002
""", con)
sql_result

Unnamed: 0,manufacturer,engine,minimum,srednia
0,AIRBUS,Turbo-fan,2002.0,2007.281734
1,AVIAT AIRCRAFT INC,Reciprocating,2007.0,2007.0
2,CIRRUS DESIGN CORP,Reciprocating,2007.0,2007.0
3,FRIEDEMANN JON,Reciprocating,2007.0,2007.0
4,ROBINSON HELICOPTER CO,Turbo-shaft,2012.0,2012.0


In [61]:
# można używać numerów zamiast nazw kolumn
# zadziała też przy ORDER BY
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year) as minimum, AVG(year) as srednia
FROM planes
GROUP BY 1, 2
HAVING minimum > 2000 AND srednia > 2002
""", con)
sql_result

Unnamed: 0,manufacturer,engine,minimum,srednia
0,AIRBUS,Turbo-fan,2002.0,2007.281734
1,AVIAT AIRCRAFT INC,Reciprocating,2007.0,2007.0
2,CIRRUS DESIGN CORP,Reciprocating,2007.0,2007.0
3,FRIEDEMANN JON,Reciprocating,2007.0,2007.0
4,ROBINSON HELICOPTER CO,Turbo-shaft,2012.0,2012.0


In [62]:
# można używać numerów zamiast nazw kolumn
# zadziała też przy ORDER BY
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year) as minimum, AVG(year) as srednia
FROM planes
GROUP BY 1, 2
HAVING minimum > 2000 AND srednia > 2002
ORDER BY 2 DESC, 1
""", con)
sql_result

Unnamed: 0,manufacturer,engine,minimum,srednia
0,ROBINSON HELICOPTER CO,Turbo-shaft,2012.0,2012.0
1,AIRBUS,Turbo-fan,2002.0,2007.281734
2,AVIAT AIRCRAFT INC,Reciprocating,2007.0,2007.0
3,CIRRUS DESIGN CORP,Reciprocating,2007.0,2007.0
4,FRIEDEMANN JON,Reciprocating,2007.0,2007.0


In [63]:
# where przed pogrupowaniem (na kolumnach)
# having po pogrupowaniu (na funkcjach agregujących)
sql_result = pd.read_sql_query("""
SELECT manufacturer, engine, MIN(year) as minimum, AVG(year) as srednia
FROM planes
WHERE seats > 200
GROUP BY 1, 2
HAVING minimum > 2000 AND srednia > 2002
""", con)
sql_result

Unnamed: 0,manufacturer,engine,minimum,srednia
0,AIRBUS,Turbo-fan,2004.0,2010.661538
1,AIRBUS INDUSTRIE,Turbo-fan,2013.0,2013.0
2,AIRBUS INDUSTRIE,Turbo-jet,2004.0,2004.0


# Join

In [65]:
sql_result = pd.read_sql_query("""
SELECT *
FROM flights
LIMIT 3
""", con)
sql_result

Unnamed: 0,year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
0,2013,1,1,517.0,515,2.0,830.0,819,11.0,UA,1545,N14228,EWR,IAH,227.0,1400,5,15,2013-01-01 05:00:00
1,2013,1,1,533.0,529,4.0,850.0,830,20.0,UA,1714,N24211,LGA,IAH,227.0,1416,5,29,2013-01-01 05:00:00
2,2013,1,1,542.0,540,2.0,923.0,850,33.0,AA,1141,N619AA,JFK,MIA,160.0,1089,5,40,2013-01-01 05:00:00


In [66]:
sql_result = pd.read_sql_query("""
SELECT *
FROM airlines
LIMIT 10
""", con)
sql_result

Unnamed: 0,carrier,name
0,9E,Endeavor Air Inc.
1,AA,American Airlines Inc.
2,AS,Alaska Airlines Inc.
3,B6,JetBlue Airways
4,DL,Delta Air Lines Inc.
5,EV,ExpressJet Airlines Inc.
6,F9,Frontier Airlines Inc.
7,FL,AirTran Airways Corporation
8,HA,Hawaiian Airlines Inc.
9,MQ,Envoy Air


In [70]:
# domyślnie jest inner join
# ale możemy zamiast JOIN napisać np. LEFT JOIN
sql_result = pd.read_sql_query("""
SELECT flights.carrier, flights.tailnum, airlines.name
FROM flights
JOIN airlines
ON flights.carrier = airlines.carrier
""", con)
sql_result

Unnamed: 0,carrier,tailnum,name
0,UA,N14228,United Air Lines Inc.
1,UA,N24211,United Air Lines Inc.
2,AA,N619AA,American Airlines Inc.
3,B6,N804JB,JetBlue Airways
4,DL,N668DN,Delta Air Lines Inc.
...,...,...,...
336771,9E,,Endeavor Air Inc.
336772,9E,,Endeavor Air Inc.
336773,MQ,N535MQ,Envoy Air
336774,MQ,N511MQ,Envoy Air


In [71]:
sql_result = pd.read_sql_query("""
SELECT f.carrier, tailnum, airlines.name
FROM flights as f
JOIN airlines
ON f.carrier = airlines.carrier
""", con)
sql_result

Unnamed: 0,carrier,tailnum,name
0,UA,N14228,United Air Lines Inc.
1,UA,N24211,United Air Lines Inc.
2,AA,N619AA,American Airlines Inc.
3,B6,N804JB,JetBlue Airways
4,DL,N668DN,Delta Air Lines Inc.
...,...,...,...
336771,9E,,Endeavor Air Inc.
336772,9E,,Endeavor Air Inc.
336773,MQ,N535MQ,Envoy Air
336774,MQ,N511MQ,Envoy Air


In [72]:
sql_result = pd.read_sql_query("""
SELECT *
FROM flights
LIMIT 10
""", con)
sql_result

Unnamed: 0,year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
0,2013,1,1,517.0,515,2.0,830.0,819,11.0,UA,1545,N14228,EWR,IAH,227.0,1400,5,15,2013-01-01 05:00:00
1,2013,1,1,533.0,529,4.0,850.0,830,20.0,UA,1714,N24211,LGA,IAH,227.0,1416,5,29,2013-01-01 05:00:00
2,2013,1,1,542.0,540,2.0,923.0,850,33.0,AA,1141,N619AA,JFK,MIA,160.0,1089,5,40,2013-01-01 05:00:00
3,2013,1,1,544.0,545,-1.0,1004.0,1022,-18.0,B6,725,N804JB,JFK,BQN,183.0,1576,5,45,2013-01-01 05:00:00
4,2013,1,1,554.0,600,-6.0,812.0,837,-25.0,DL,461,N668DN,LGA,ATL,116.0,762,6,0,2013-01-01 06:00:00
5,2013,1,1,554.0,558,-4.0,740.0,728,12.0,UA,1696,N39463,EWR,ORD,150.0,719,5,58,2013-01-01 05:00:00
6,2013,1,1,555.0,600,-5.0,913.0,854,19.0,B6,507,N516JB,EWR,FLL,158.0,1065,6,0,2013-01-01 06:00:00
7,2013,1,1,557.0,600,-3.0,709.0,723,-14.0,EV,5708,N829AS,LGA,IAD,53.0,229,6,0,2013-01-01 06:00:00
8,2013,1,1,557.0,600,-3.0,838.0,846,-8.0,B6,79,N593JB,JFK,MCO,140.0,944,6,0,2013-01-01 06:00:00
9,2013,1,1,558.0,600,-2.0,753.0,745,8.0,AA,301,N3ALAA,LGA,ORD,138.0,733,6,0,2013-01-01 06:00:00


In [73]:
# LEFT JOIN powoduje braki danych, czyli None w Carrier i tailnum
sql_result = pd.read_sql_query("""
SELECT pierwsze.carrier, pierwsze.tailnum, airlines.name
FROM airlines
LEFT JOIN (SELECT *
FROM flights
LIMIT 10) pierwsze
ON pierwsze.carrier = airlines.carrier
""", con)
sql_result

Unnamed: 0,carrier,tailnum,name
0,,,Endeavor Air Inc.
1,AA,N3ALAA,American Airlines Inc.
2,AA,N619AA,American Airlines Inc.
3,,,Alaska Airlines Inc.
4,B6,N516JB,JetBlue Airways
5,B6,N593JB,JetBlue Airways
6,B6,N804JB,JetBlue Airways
7,DL,N668DN,Delta Air Lines Inc.
8,EV,N829AS,ExpressJet Airlines Inc.
9,,,Frontier Airlines Inc.
