# SQL in Python

In [2]:
# Imports

import sqlite3
import pandas as pd

In [3]:
# Load data

data = pd.read_csv('https://raw.githubusercontent.com/renatomaaliw3/public_files/master/Data%20Sets/summer.csv')
data.head(5)

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [4]:
# Connect to an in memory SQLite Database

conn = sqlite3.connect(':memory')

In [5]:
# Write the DataFrame to the Database

data.to_sql('Summer', conn, index = False)

31165

In [7]:
# Execute a SQL Query

query = "SELECT * FROM Summer"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [6]:
# Notes

# 00. SELECT (Extract)
# 0. AS (Alias)
# 1. WHERE (Certain conditions), AND, OR Comparison Operator (<,>,=, etc.)
# 2. ORDER BY, By DEFAUT ASC
# 3. LIKE (Wildcard -> For String), %, _
# 4. GROUP BY, HAVING (Aggregation Function)
# 5. (AVG, COUNT, SUM), (DISTINCT) -> unique
# 6. IN

In [9]:
# Specific Columns

query = "SELECT City, Athlete FROM Summer"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,City,Athlete
0,Athens,"HAJOS, Alfred"
1,Athens,"HERSCHMANN, Otto"
2,Athens,"DRIVAS, Dimitrios"
3,Athens,"MALOKINIS, Ioannis"
4,Athens,"CHASAPIS, Spiridon"
...,...,...
31160,London,"JANIKOWSKI, Damian"
31161,London,"REZAEI, Ghasem Gholamreza"
31162,London,"TOTROV, Rustam"
31163,London,"ALEKSANYAN, Artur"


In [11]:
# Alias

query = "SELECT City, Athlete AS 'Full Name' FROM Summer"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,City,Full Name
0,Athens,"HAJOS, Alfred"
1,Athens,"HERSCHMANN, Otto"
2,Athens,"DRIVAS, Dimitrios"
3,Athens,"MALOKINIS, Ioannis"
4,Athens,"CHASAPIS, Spiridon"
...,...,...
31160,London,"JANIKOWSKI, Damian"
31161,London,"REZAEI, Ghasem Gholamreza"
31162,London,"TOTROV, Rustam"
31163,London,"ALEKSANYAN, Artur"


In [17]:
# WHERE

query = "SELECT * FROM Summer WHERE Event = '100M Freestyle' AND Medal = 'Gold' AND Country = 'USA' AND Gender = 'Men'"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1908,London,Aquatics,Swimming,"DANIELS, Charles",USA,Men,100M Freestyle,Gold
1,1912,Stockholm,Aquatics,Swimming,"KAHANAMOKU, Duke Paoa",USA,Men,100M Freestyle,Gold
2,1920,Antwerp,Aquatics,Swimming,"KAHANAMOKU, Duke Paoa",USA,Men,100M Freestyle,Gold
3,1924,Paris,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
4,1928,Amsterdam,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
5,1948,London,Aquatics,Swimming,"RIS, Walter Stephen",USA,Men,100M Freestyle,Gold
6,1952,Helsinki,Aquatics,Swimming,"SCHOLES, Clark Currie",USA,Men,100M Freestyle,Gold
7,1964,Tokyo,Aquatics,Swimming,"SCHOLLANDER, Donald Arthur",USA,Men,100M Freestyle,Gold
8,1972,Munich,Aquatics,Swimming,"SPITZ, Mark",USA,Men,100M Freestyle,Gold
9,1976,Montreal,Aquatics,Swimming,"MONTGOMERY, James Paul",USA,Men,100M Freestyle,Gold


In [18]:
# ORDER BY

query = "SELECT * FROM Summer WHERE Event = '100M Freestyle' AND Medal = 'Gold' AND Country = 'USA' AND Gender = 'Men' ORDER BY Athlete DESC"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1924,Paris,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
1,1928,Amsterdam,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
2,1972,Munich,Aquatics,Swimming,"SPITZ, Mark",USA,Men,100M Freestyle,Gold
3,1964,Tokyo,Aquatics,Swimming,"SCHOLLANDER, Donald Arthur",USA,Men,100M Freestyle,Gold
4,1952,Helsinki,Aquatics,Swimming,"SCHOLES, Clark Currie",USA,Men,100M Freestyle,Gold
5,1948,London,Aquatics,Swimming,"RIS, Walter Stephen",USA,Men,100M Freestyle,Gold
6,1976,Montreal,Aquatics,Swimming,"MONTGOMERY, James Paul",USA,Men,100M Freestyle,Gold
7,1912,Stockholm,Aquatics,Swimming,"KAHANAMOKU, Duke Paoa",USA,Men,100M Freestyle,Gold
8,1920,Antwerp,Aquatics,Swimming,"KAHANAMOKU, Duke Paoa",USA,Men,100M Freestyle,Gold
9,1984,Los Angeles,Aquatics,Swimming,"GAINES, Ambrose Iv",USA,Men,100M Freestyle,Gold


In [20]:
# LIMIT

query = "SELECT * FROM Summer WHERE Event = '100M Freestyle' AND Medal = 'Gold' AND Country = 'USA' AND Gender = 'Men' ORDER BY Athlete DESC LIMIT 3"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1924,Paris,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
1,1928,Amsterdam,Aquatics,Swimming,"WEISSMULLER, Johnny",USA,Men,100M Freestyle,Gold
2,1972,Munich,Aquatics,Swimming,"SPITZ, Mark",USA,Men,100M Freestyle,Gold


In [23]:
# LIKE (Wildcard %)

query = "SELECT * FROM Summer WHERE Athlete LIKE '%kyo%' AND Gender = 'Men'"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1968,Mexico,Boxing,Boxing,"CHANG, Kyou-Chul",KOR,Men,51 - 54KG (Bantamweight),Bronze
1,1984,Los Angeles,Gymnastics,Artistic G.,"YAMAWAKI, Kyoji",JPN,Men,Team Competition,Bronze
2,1996,Atlanta,Archery,Archery,"OH, Kyo-Moon",KOR,Men,Individual (Fita Olympic Round - 70M),Bronze
3,1996,Atlanta,Archery,Archery,"OH, Kyo-Moon",KOR,Men,Team (Fita Olympic Round - 70M),Silver
4,2000,Sydney,Archery,Archery,"OH, Kyo-Moon",KOR,Men,Team (Fita Olympic Round - 70M),Gold
5,2000,Sydney,Taekwondo,Taekwondo,"KIM, Kyong-Hun",KOR,Men,+ 80 KG,Gold
6,2012,London,Football,Football,"YUN, Sukyoung",KOR,Men,Football,Bronze
7,2012,London,Wrestling,Wrestling Freestyle,"YANG, Kyong Il",PRK,Men,Wf 55 KG,Bronze


In [29]:
# GROUP BY, Aggregation Function, COUNT, MAX, MIN, AVG, SUM)

query = "SELECT Country, COUNT(Medal) AS 'Medal Count' FROM Summer GROUP BY Country ORDER BY Count(Medal)"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Country,Medal Count
0,USA,4585
1,URS,2049
2,GBR,1720
3,FRA,1396
4,GER,1305
...,...,...
143,BOT,1
144,BER,1
145,BDI,1
146,BAR,1


In [31]:
# GROUP BY, HAVING (Aggregation Function, COUNT, MAX, MIN, AVG, SUM)

query = "SELECT Country, COUNT(Medal) AS 'Medal Count' FROM Summer GROUP BY Country HAVING Country = 'USA' OR Country = 'PHI' ORDER BY Count(Medal) DESC"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Country,Medal Count
0,USA,4585
1,PHI,9


In [32]:
# DISTINCT

query = "SELECT DISTINCT(Country) FROM Summer"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Country
0,HUN
1,AUT
2,GRE
3,USA
4,GER
...,...
143,BOT
144,MNE
145,CYP
146,SGP


In [34]:
# DISTINCT

query = "SELECT COUNT(DISTINCT(Country)) AS 'Participating Country' FROM Summer"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Participating Country
0,147


In [38]:
# IN (Shorthand OR), Can be used in WHERE or HAVING

query = "SELECT Country, COUNT(Athlete) FROM Summer GROUP BY Country HAVING Country IN ('USA', 'AUS', 'CAN')"
result_sql = pd.read_sql_query(query, conn)
result_sql

Unnamed: 0,Country,COUNT(Athlete)
0,AUS,1189
1,CAN,649
2,USA,4585
