In [64]:
import requests
import pandas as pd
import time

In [32]:
def fetch_player_info(id):
    base_url = "https://www.balldontlie.io/api/v1/players"
    response = requests.get(f"{base_url}/{id}")
    return response.json()

In [34]:
player_info = fetch_player_info(237)
player_info

{'id': 237,
 'first_name': 'LeBron',
 'height_feet': 6,
 'height_inches': 8,
 'last_name': 'James',
 'position': 'F',
 'team': {'id': 14,
  'abbreviation': 'LAL',
  'city': 'Los Angeles',
  'conference': 'West',
  'division': 'Pacific',
  'full_name': 'Los Angeles Lakers',
  'name': 'Lakers'},
 'weight_pounds': 250}

In [9]:
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(players_data)

# If you want to normalize the 'team' nested dictionary into separate columns in the same DataFrame
df_team = pd.json_normalize(df['team'])

# Drop the 'team' column as we have normalized it
df.drop('team', axis=1, inplace=True)

# Concatenate the original DataFrame and the normalized 'team' DataFrame
df = pd.concat([df, df_team], axis=1)

In [49]:
base_url = "https://www.balldontlie.io/api/v1/season_averages?season=2022"

In [61]:
response = requests.get(base_url + "&player_ids[]=237")

In [62]:
response.json()

{'data': [{'games_played': 56,
   'player_id': 237,
   'season': 2022,
   'min': '35:42',
   'fgm': 11.09,
   'fga': 22.14,
   'fg3m': 2.21,
   'fg3a': 6.84,
   'ftm': 4.54,
   'fta': 5.89,
   'oreb': 1.18,
   'dreb': 7.16,
   'reb': 8.34,
   'ast': 6.8,
   'stl': 0.91,
   'blk': 0.61,
   'turnover': 3.27,
   'pf': 1.66,
   'pts': 28.93,
   'fg_pct': 0.501,
   'fg3_pct': 0.324,
   'ft_pct': 0.77}]}

In [122]:

def fetch_players():
    base_url = "https://www.balldontlie.io/api/v1/players"
    players = []
    current_page = 1
    total_pages = 1
    
    # Initialize retry_count
    retry_count = 0
    
    while current_page <= total_pages:
        response = requests.get(f"{base_url}?page={current_page}")
        
        if response.status_code == 200:
            data = response.json()
            players.extend(data['data'])
            
            total_pages = data['meta']['total_pages']
            current_page += 1
            
            # Reset retry_count after a successful request
            retry_count = 0
            if current_page % 10 == 0:
                print(f"Page {current_page - 1} fetched successfully.")

        elif response.status_code == 429:
            print("Rate limit exceeded. Sleeping for 60 seconds.")
            
            # Increase the retry_count and sleep if rate limit exceeded
            retry_count += 1
            time.sleep(60)
            
            # Optional: You may want to exit the loop if you have retried too many times
            if retry_count >= 5:
                print("Reached maximum number of retries. Exiting.")
                break
            
        else:
            print(f"Failed to get data for page {current_page}. Status code: {response.status_code}")
            break
    
    return players


def fetch_season_averages(player_ids, season=2022):
    base_url = "https://www.balldontlie.io/api/v1/season_averages"
    all_averages = []
    
    for i in range(0, len(player_ids), 100):
        print(f"Fetching season averages for players {i} to {i+100}")
        chunked_ids = player_ids[i:i+100]
        params = {
            'season': season,
            'player_ids[]': chunked_ids
        }
        
        retry_count = 0
        while retry_count < 5:
            response = requests.get(base_url, params=params)
            
            if response.status_code == 200:
                all_averages.extend(response.json()['data'])
                break
            elif response.status_code == 429:
                print("Rate limit exceeded. Sleeping for 60 seconds.")
                time.sleep(60)
                retry_count += 1
            else:
                print(f"Failed to fetch data. Status code: {response.status_code}")
                break
                
    return all_averages

Fetch all NBA players

In [74]:
players_data = fetch_players()

Page 9 fetched successfully.
Page 19 fetched successfully.
Page 29 fetched successfully.
Page 39 fetched successfully.
Page 49 fetched successfully.
Page 59 fetched successfully.
Rate limit exceeded. Sleeping for 60 seconds.
Page 69 fetched successfully.
Page 79 fetched successfully.
Page 89 fetched successfully.
Page 99 fetched successfully.
Page 109 fetched successfully.
Page 119 fetched successfully.
Rate limit exceeded. Sleeping for 60 seconds.
Page 129 fetched successfully.
Page 139 fetched successfully.
Page 149 fetched successfully.
Page 159 fetched successfully.
Page 169 fetched successfully.
Page 179 fetched successfully.
Rate limit exceeded. Sleeping for 60 seconds.
Page 189 fetched successfully.
Page 199 fetched successfully.


In [92]:
df = pd.DataFrame(players_data)
df.rename(columns={'id': 'player_id'}, inplace=True)
df_team = pd.json_normalize(df['team'])

# Drop the 'team' column as we have normalized it
df.drop('team', axis=1, inplace=True)

df_team.rename(columns={'id': 'team_id'}, inplace=True)
# Concatenate the original DataFrame and the normalized 'team' DataFrame
player_df = pd.concat([df, df_team], axis=1)


In [105]:
player_ids = player_df['player_id'].tolist()

Fetch season averaged from 2022 Season

In [123]:
season_averages = fetch_season_averages(player_ids)

In [127]:
season_averages_df = pd.DataFrame(season_averages)

season_averages_df['min'] = season_averages_df['min'].apply(
    lambda x: float(x.split(':')[0]) + float(x.split(':')[1])/60.0
)

# add player names to season_averages_df
season_averages_df = pd.merge(season_averages_df, player_df[['player_id', 'first_name', 'last_name']], on='player_id', how='left')

## Write to Database

In [130]:
from sqlalchemy import create_engine
username = 'root'
password = 'Soccer0066'
host = 'localhost'
database = 'nba'

# Create a connection to the database
engine = create_engine(f"mysql+mysqlconnector://{username}:{password}@{host}/{database}")

In [139]:
# Write the DataFrame to the 'players' table in the 'nba' database
player_df.to_sql('players', con=engine, if_exists='replace', index=False)

539

In [None]:
season_averages_df.to_sql('season_averages_22', con=engine, if_exists='replace', index=False)