The point of this project is to use the detailed NBA sqlite database to practice Aggregate Functions (Count, Sum, Distinct, Group by etc.), Where, Joins, Union, Subqueries, and With. Possibly working with pivot or case.

In [1]:
import sqlite3 as sql
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
con = sql.connect("nba.sqlite")
cur = con.cursor()

In [3]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
cur.execute(query)
tables = cur.fetchall()
tables

[('game',),
 ('game_summary',),
 ('other_stats',),
 ('officials',),
 ('inactive_players',),
 ('game_info',),
 ('line_score',),
 ('play_by_play',),
 ('player',),
 ('team',),
 ('common_player_info',),
 ('team_details',),
 ('team_history',),
 ('draft_combine_stats',),
 ('draft_history',),
 ('team_info_common',)]

In [4]:
columnDict = {}

for i,table in enumerate(tables):
    query = "SELECT * FROM %s;" % table
    cur.execute(query)
    cols = list(cur.description)
    valuelist = []
    for j, col in enumerate(cols):
        collist = list(col)
        valuelist.append(collist[0])
    columnDict[table] = valuelist

columnDict

{('game',): ['season_id',
  'team_id_home',
  'team_abbreviation_home',
  'team_name_home',
  'game_id',
  'game_date',
  'matchup_home',
  'wl_home',
  'min',
  'fgm_home',
  'fga_home',
  'fg_pct_home',
  'fg3m_home',
  'fg3a_home',
  'fg3_pct_home',
  'ftm_home',
  'fta_home',
  'ft_pct_home',
  'oreb_home',
  'dreb_home',
  'reb_home',
  'ast_home',
  'stl_home',
  'blk_home',
  'tov_home',
  'pf_home',
  'pts_home',
  'plus_minus_home',
  'video_available_home',
  'team_id_away',
  'team_abbreviation_away',
  'team_name_away',
  'matchup_away',
  'wl_away',
  'fgm_away',
  'fga_away',
  'fg_pct_away',
  'fg3m_away',
  'fg3a_away',
  'fg3_pct_away',
  'ftm_away',
  'fta_away',
  'ft_pct_away',
  'oreb_away',
  'dreb_away',
  'reb_away',
  'ast_away',
  'stl_away',
  'blk_away',
  'tov_away',
  'pf_away',
  'pts_away',
  'plus_minus_away',
  'video_available_away',
  'season_type'],
 ('game_summary',): ['game_date_est',
  'game_sequence',
  'game_id',
  'game_status_id',
  'game_sta

# Aggregate Function Experimentation

In [60]:
query ="""SELECT AVG(ABS(plus_minus_home)) as PM, SUBSTRING(season_id,2,4) as Year FROM game group by Year order by Year"""
cur.execute(query)
cur.fetchmany(15)

[(9.851428571428572, '1946'),
 (10.865116279069767, '1947'),
 (10.721052631578948, '1948'),
 (11.723440134907252, '1949'),
 (10.599476439790577, '1950'),
 (10.168067226890756, '1951'),
 (10.14095744680851, '1952'),
 (10.020114942528735, '1953'),
 (8.279742765273312, '1954'),
 (9.549520766773163, '1955'),
 (9.27831715210356, '1956'),
 (10.513793103448275, '1957'),
 (10.655172413793103, '1958'),
 (11.330275229357799, '1959'),
 (22.0, '1960')]

In [32]:
query ="""SELECT * FROM game WHERE SUBSTRING(season_id,2,4) ='1960'"""
cur.execute(query)
cur.fetchall()

[('31960',
  '1610616833',
  'EST',
  'East NBA All Stars East',
  '0036000001',
  '1961-01-17 00:00:00',
  'EST vs. WST',
  'L',
  240,
  49.0,
  117.0,
  0.419,
  None,
  None,
  None,
  33.0,
  47.0,
  0.702,
  None,
  None,
  63.0,
  28.0,
  None,
  None,
  None,
  30.0,
  131.0,
  -22,
  0,
  '1610616834',
  'WST',
  'West NBA All Stars West',
  'WST @ EST',
  'W',
  58.0,
  115.0,
  0.504,
  None,
  None,
  None,
  37.0,
  47.0,
  0.787,
  None,
  None,
  55.0,
  37.0,
  None,
  None,
  None,
  30.0,
  153.0,
  22,
  0,
  'All-Star')]

I noticed 1960's Plus-Minus was strange and had to investigate, and so it seems like the only data is for the All Star Game so it makes sense that the value is strangely round. 

In [59]:
query ="""SELECT team_name_away as Team, SUM(pts_away) as PTS FROM game group by team_name_away order by PTS DESC"""
cur.execute(query)
cur.fetchmany(5)

[('Boston Celtics', 315242.0),
 ('New York Knicks', 292706.0),
 ('Los Angeles Lakers', 268826.0),
 ('Detroit Pistons', 261218.0),
 ('Philadelphia 76ers', 251917.0)]

In [58]:
query ="""SELECT DISTINCT(team_name_away) FROM game order by team_name_away"""
cur.execute(query)
cur.fetchmany(5)

[('Adelaide 36ers',),
 ('Anderson Packers',),
 ('Athens Olympiacos',),
 ('Atlanta Hawks',),
 ('Baltimore Bullets',)]

# Where Statements

In [56]:
query ="""SELECT team_name_home, team_name_away, ABS(plus_minus_home) as PM, game_date FROM game WHERE PM >= 20 order by PM DESC"""
cur.execute(query)
cur.fetchmany(5)

[('Memphis Grizzlies', 'Oklahoma City Thunder', 73, '2021-12-02 00:00:00'),
 ('Cleveland Cavaliers', 'Miami Heat', 68, '1991-12-17 00:00:00'),
 ('Charlotte Hornets', 'Dallas Mavericks', 68, '2021-10-13 00:00:00'),
 ('Indiana Pacers', 'Portland Trail Blazers', 65, '1998-02-27 00:00:00'),
 ('Los Angeles Lakers', 'Golden State Warriors', 63, '1972-03-19 00:00:00')]

In [57]:
query ="""SELECT first_name, last_name, height_wo_shoes FROM draft_combine_stats WHERE position in ('G', 'PG', 'SG') AND Wingspan < 75 order by Weight"""
cur.execute(query)
cur.fetchmany(5)

[('Tyler', 'Ulis', 68.75),
 ('John', 'Lucas III', 69.25),
 ('Cordell', 'Henry', 68.0),
 ('Daryl', 'Dorsey', 71.25),
 ('Max', 'Abmas', 70.5)]

# JOINS

In [71]:
query ="""SELECT GI.attendance as attendance,
GI.game_date as date,
LS.team_wins_losses_home as HomeWL,
LS.team_abbreviation_home as Home, 
LS.team_wins_losses_away as AwayWL,
LS.team_abbreviation_away as Away 
FROM game_info as GI 
JOIN line_score as LS
on GI.game_id = LS.game_id
Where attendance not in ('None') AND HomeWL not in ('-')
order by attendance desc
"""
cur.execute(query)
cur.fetchmany(20)

[(108713, '2010-02-14 00:00:00', '1-0', 'EST', '0-1', 'WST'),
 (108713, '2010-02-14 00:00:00', '1-0', 'EST', '0-1', 'WST'),
 (108713, '2010-02-14 00:00:00', '1-0', 'EST', '0-1', 'WST'),
 (108713, '2010-02-14 00:00:00', '1-0', 'EST', '0-1', 'WST'),
 (68323, '2023-01-13 00:00:00', '13-30', 'SAS', '21-21', 'GSW'),
 (64512, '2006-10-14 00:00:00', '2-1', 'MIL', '0-3', 'DAL'),
 (62976, '2006-10-17 00:00:00', '2-1', 'LAC', '3-2', 'PHX'),
 (62976, '2006-10-19 00:00:00', '2-1', 'SAC', '4-2', 'PHX'),
 (60672, '2006-10-20 00:00:00', '1-3', 'POR', '1-3', 'SEA'),
 (60416, '2006-10-17 00:00:00', '2-1', 'HOU', '0-4', 'DAL'),
 (60416, '2006-10-19 00:00:00', '2-3', 'LAL', '3-1', 'LAC'),
 (56576, '2006-10-24 00:00:00', '3-4', 'MEM', '5-1', 'CHI'),
 (55808, '2006-10-24 00:00:00', '4-1', 'NYK', '2-4', 'PHI'),
 (54528, '2006-10-21 00:00:00', '3-3', 'MEM', '5-1', 'ORL'),
 (53504, '2006-10-18 00:00:00', '3-1', 'HOU', '2-2', 'MIL'),
 (49664, '2006-10-14 00:00:00', '0-2', 'MIA', '2-1', 'ATL'),
 (49664, '2006-1

In [89]:
query ="""SELECT O.official_id,
O.first_name,
O.last_name,
GI.attendance,
LS.team_city_name_home
FROM officials as O 
INNER JOIN game_info as GI
ON GI.game_id = O.game_id 
INNER JOIN line_score as LS 
ON O.game_id = LS.game_id
"""
cur.execute(query)
cur.fetchmany(20)

[('1140', 'Bruce', 'Alexander', 21454, 'Chicago'),
 ('1165', 'Luis', 'Grillo', 21454, 'Chicago'),
 ('1153', 'Joe', 'Crawford', 21454, 'Chicago'),
 ('1147', 'Mike', 'Callahan', 19763, 'New York'),
 ('1142', 'Dick', 'Bavetta', 19763, 'New York'),
 ('1202', 'Tommie', 'Wood', 19763, 'New York'),
 ('1146', 'Tony', 'Brothers', 15947, 'Dallas'),
 ('1157', 'Terry', 'Durham', 15947, 'Dallas'),
 ('1177', 'Ed', 'Middleton', 15947, 'Dallas'),
 ('1151', 'Sean', 'Corbin', 24042, 'Charlotte'),
 ('1168', 'David', 'Jones', 24042, 'Charlotte'),
 ('1186', 'Blane', 'Reichelt', 17248, 'Orlando'),
 ('1165', 'Luis', 'Grillo', 17248, 'Orlando'),
 ('1153', 'Joe', 'Crawford', 17248, 'Orlando'),
 ('1167', 'Steve', 'Javie', 19351, 'Cleveland'),
 ('1148', 'James', 'Capers', 19351, 'Cleveland'),
 ('1157', 'Terry', 'Durham', 19351, 'Cleveland'),
 ('1176', 'Monty', 'McCutchen', 15815, 'New Jersey'),
 ('1158', 'Hugh', 'Evans', 15815, 'New Jersey'),
 ('1163', 'Bernie', 'Fryer', 15815, 'New Jersey')]

# Unions

# Subqueries