# Get NBA Data

## Importing Packages

In [166]:
import requests
import json
import csv
import time
import pandas as pd

## Testing connection to API

In [159]:
base = "https://www.balldontlie.io/api/v1/"
url = "games"


seasons = 2018
query = f'seasons[]={seasons}&per_page=100'
api_key = '35a5e451092228a0be8d70198cd41eef'
params = f'?{query}' 

In [160]:
res = requests.get(base+url+params)

In [161]:
res.status_code

200

In [162]:
res.json()['meta']['total_pages']

14

In [171]:
res.json()['meta']

{'total_pages': 14,
 'current_page': 1,
 'next_page': 2,
 'per_page': 100,
 'total_count': 1311}

In [169]:
pd.json_normalize(res.json()['data'])

Unnamed: 0,id,date,home_team_score,period,postseason,season,status,time,visitor_team_score,home_team.id,...,home_team.division,home_team.full_name,home_team.name,visitor_team.id,visitor_team.abbreviation,visitor_team.city,visitor_team.conference,visitor_team.division,visitor_team.full_name,visitor_team.name
0,47179,2019-01-30T00:00:00.000Z,126,4,False,2018,Final,,94,2,...,Atlantic,Boston Celtics,Celtics,4,CHA,Charlotte,East,Southeast,Charlotte Hornets,Hornets
1,48751,2019-02-09T00:00:00.000Z,112,4,False,2018,Final,,123,2,...,Atlantic,Boston Celtics,Celtics,13,LAC,LA,West,Pacific,LA Clippers,Clippers
2,48739,2019-02-08T00:00:00.000Z,117,4,False,2018,Final,,110,23,...,Atlantic,Philadelphia 76ers,76ers,8,DEN,Denver,West,Northwest,Denver Nuggets,Nuggets
3,48740,2019-02-08T00:00:00.000Z,119,4,False,2018,Final,,106,30,...,Southeast,Washington Wizards,Wizards,6,CLE,Cleveland,East,Central,Cleveland Cavaliers,Cavaliers
4,48746,2019-02-08T00:00:00.000Z,102,4,False,2018,Final,,96,26,...,Pacific,Sacramento Kings,Kings,16,MIA,Miami,East,Southeast,Miami Heat,Heat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,48807,2019-02-23T00:00:00.000Z,112,4,False,2018,Final,,119,30,...,Southeast,Washington Wizards,Wizards,12,IND,Indiana,East,Central,Indiana Pacers,Pacers
96,48792,2019-02-21T00:00:00.000Z,99,4,False,2018,Final,,113,3,...,Atlantic,Brooklyn Nets,Nets,25,POR,Portland,West,Northwest,Portland Trail Blazers,Trail Blazers
97,48790,2019-02-21T00:00:00.000Z,111,4,False,2018,Final,,98,6,...,Central,Cleveland Cavaliers,Cavaliers,24,PHX,Phoenix,West,Pacific,Phoenix Suns,Suns
98,48837,2019-02-27T00:00:00.000Z,116,4,False,2018,Final,,125,3,...,Atlantic,Brooklyn Nets,Nets,30,WAS,Washington,East,Southeast,Washington Wizards,Wizards


# Build function to retrieve data by season

In [189]:
def get_season_data(stat, seasons):
    url = f"https://www.balldontlie.io/api/v1/{stat}?seasons[]={season}&per_page=100"
    res = requests.get(url)
    status = res.status_code
    print(f'Status: {status}')
    
    #get total number of pages
    pages = res.json()['meta']['total_pages']
    filename = f'data/{stat}_{season}_season.csv'
    
    # Creates a DataFrame with the flattened JSON results
    df = pd.json_normalize(res.json()['data'])
    
    #repeat get request for each page from page 2 onwards, adding it to the dataframe
    for page in range(2,pages+1):
        url = f"https://www.balldontlie.io/api/v1/{stat}?seasons[]={season}&per_page=100&page={page}"
        res = requests.get(url)
        status = res.status_code
        
        # If we get anything but 200 as status, tell us what page
        if status != 200:
            print(f'Page {page} Status: {status}')
        
        # normalize JSON result and add results to dataframe
        df_page = pd.json_normalize(res.json()['data'])
        df = pd.concat([df, df_page])
        
        # sleep for 1 second after each call. BDL API is limited to 60 per minute
        time.sleep(1)
    
    # Save dataframe to csv
    df.to_csv(filename, index=False)
    print(f'{filename} saved')

# Get Games Data

In [190]:
#seasons = [2013, 2014, 2015, 2016, 2017, 2018]
seasons = [2008, 2009, 2010, 2011, 2012]
for season in seasons:
    print(f'Getting game data for {season} season')
    get_season_data('games', season)

Getting game data for 2008 season
Status: 200
data/games_2008_season.csv saved
Getting game data for 2009 season
Status: 200
data/games_2009_season.csv saved
Getting game data for 2010 season
Status: 200
data/games_2010_season.csv saved
Getting game data for 2011 season
Status: 200
data/games_2011_season.csv saved
Getting game data for 2012 season
Status: 200
data/games_2012_season.csv saved


# Get player stats

In [191]:
#seasons = [2013, 2014, 2015, 2016, 2017, 2018]
for season in seasons:
    print(f'Getting player stats for {season} season')
    get_season_data('stats', season)

Getting player stats for 2008 season
Status: 200
data/stats_2008_season.csv saved
Getting player stats for 2009 season
Status: 200
data/stats_2009_season.csv saved
Getting player stats for 2010 season
Status: 200
data/stats_2010_season.csv saved
Getting player stats for 2011 season
Status: 200
data/stats_2011_season.csv saved
Getting player stats for 2012 season
Status: 200
data/stats_2012_season.csv saved
