# **DS6001: Live Coding 7**
## **Randa Ampah**

In [2]:
import numpy as np
import pandas as pd
import sqlite3

In [3]:
nbadb = sqlite3.connect('nba2024.db')

In [5]:
nbadb

<sqlite3.Connection at 0x10434e2f0>

In [6]:
myquery = '''
SELECT * 
FROM games
WHERE sellout=1
ORDER BY duration DESC
'''

pd.read_sql_query(myquery, con=nbadb) # con is connection

Unnamed: 0,gameid,attendance,sellout,duration,regulationPeriods
0,22401102,19927,1,190,4
1,22400143,19815,1,178,4
2,22400773,19812,1,174,4
3,22400903,18997,1,173,4
4,22400867,19432,1,172,4
...,...,...,...,...,...
797,22401174,19156,1,113,4
798,22401137,19156,1,113,4
799,22400759,19961,1,113,4
800,22401200,17832,1,113,4


In [None]:
# same query, bad etiquette
myquery = 'select * from games where sellout=1 order by duration desc'

In [None]:
# good etiquette
# clauses in all caps
# each clause on a new line (why we need multi line string)
# except for clauses that are part of other clauses (ex. DESC, since its part of ORDER BY)
    # then it can be on same line or new line withindent to indicate it's part of
    # last clause

myquery = '''
SELECT * 
FROM games 
WHERE sellout=1 
ORDER BY duration DESC
'''

pd.read_sql_query(myquery, con=nbadb)

## **MVP**

In [17]:

myquery = '''
SELECT p.display_first_last, 
    SUM(pg.points + pg.threepointersmade - pg.fieldgoalsattempted + 2*pg.fieldgoalsmade - pg.freethrowsattempted +
    pg.freethrowsmade + pg.reboundstotal + 2*pg.assists + 4*pg.blocks + 4*pg.steals - 2*pg.turnovers) AS mvp_points
FROM playergames pg
LEFT JOIN players p
    ON p.personid = pg.personid
GROUP BY pg.personid, p.display_first_last
ORDER BY mvp_points DESC
'''

pd.read_sql_query(myquery, con=nbadb)

Unnamed: 0,display_first_last,mvp_points
0,Nikola Jokić,4880
1,Shai Gilgeous-Alexander,4482
2,Giannis Antetokounmpo,3988
3,James Harden,3571
4,Tyrese Haliburton,3534
...,...,...
582,David Jones Garcia,0
583,,0
584,,0
585,,-1


In [8]:
pd.read_sql_query(myquery, con=nbadb).columns

Index(['status', 'personid', 'position', 'starter', 'assists', 'blocks',
       'fieldgoalsattempted', 'fieldgoalsmade', 'foulsoffensive', 'foulsdrawn',
       'foulspersonal', 'foulstechnical', 'freethrowsattempted',
       'freethrowsmade', 'minutes', 'plusminuspoints', 'points',
       'pointsfastbreak', 'pointsinthepaint', 'pointssecondchance',
       'reboundsdefensive', 'reboundsoffensive', 'reboundstotal', 'steals',
       'threepointersattempted', 'threepointersmade', 'turnovers',
       'notplayingreason', 'notplayingdescription', 'gameid', 'team_id'],
      dtype='object')

## **Challenge**

In [24]:
# All scores that happened wihtout considering which score belonged to the home and away team (permutation)
# with latest date that score was achieved

myquery = '''
SELECT win_score, lose_score, COUNT(*) as count, MAX(game_date) AS date 
FROM (SELECT gameid, 
    game_date, 
    MAX(pts) AS win_score, 
    MIN(pts) AS lose_score
FROM teamgames
GROUP BY gameid, game_date)
GROUP BY win_score, lose_score

'''

pd.read_sql_query(myquery, con=nbadb)

Unnamed: 0,win_score,lose_score,count,date
0,89,88,1,2024-12-26
1,91,90,1,2024-12-11
2,93,86,1,2025-04-13
3,93,92,1,2024-11-29
4,94,87,1,2025-02-09
...,...,...,...,...
736,149,148,1,2025-02-12
737,151,148,1,2025-02-25
738,153,104,1,2025-04-11
739,155,126,1,2024-12-26
