In [2]:
# Install libraries
!pip3 install nba_api --quiet

In [26]:
def printLoad(numerator, denominator):
    if denominator > 0:
        perc = numerator/denominator
    else:
        print("Error!")
        return

    done = "█" * round(perc * 25)
    left = "░" * (25 - round(perc * 25))
    
    print(f"{round(100*perc,1)}% {done+left}", end='\r', flush=True)

In [3]:
# Import libraries
import pandas as pd
from nba_api.stats.endpoints import playergamelog, leaguedashplayerbiostats
from nba_api.stats.library import data
from nba_api.stats.static import players as pl
import time

In [4]:
# Get players list
players = data.players


# Seperate active and retired players
activePlayers = []
for p in players:
    if p[4] == True and p not in activePlayers:
        activePlayers.append(p)
retiredPlayers = []
for p in players:
    if p[4] == False and p not in retiredPlayers:
        retiredPlayers.append(p)

# Fix Korean and Chinese names
for p in players:
    if p[3] == 'Ha Seung-jin':
        p[1] = 'Ha'
        p[2] = 'Seung-jin'
    elif p[3] == 'Sun Yue':
        p[1] = 'Sun'
        p[2] = 'Yue'
    elif p[3] == 'Yao Ming':
        p[1] = 'Yao'
        p[2] = 'Ming'
    elif p[3] == 'Yi Jianlian':
        p[1] = 'Yi'
        p[2] = 'Jianlian'
    elif p[3] == 'Zhou Qi':
        p[1] = 'Zhou'
        p[2] = 'Qi'

In [5]:
for p in players:
    p[1] = pl._strip_accents(p[1])
    p[2] = pl._strip_accents(p[2])
    p[3] = pl._strip_accents(p[3])

In [6]:
for p in players:
    if '.J.' in p[3] or '.C.' in p[3] or '.P.' in p[3] or '.R.' in p[3] or '.L.' in p[3]:
        p[2] = p[2].replace('.J.','J').replace('.C.','C').replace('.P.','P').replace('.R.','R').replace('.L.','L')
        p[3] = p[3].replace('.J.','J').replace('.C.','C').replace('.P.','P').replace('.R.','R').replace('.L.','L')

In [7]:

playerBios = leaguedashplayerbiostats.LeagueDashPlayerBioStats(season='2024-25',season_type_all_star='Regular Season',league_id='00').get_data_frames()[0].dropna(axis=1, how='all')

fn_dict = {p[0]: p[2] for p in players}
ln_dict = {p[0]: p[1] for p in players}
n_dict = {p[0]: p[3] for p in players}
a_dict = {p[0]: p[4] for p in players}

playerBios['PLAYER_FIRST_NAME'] = playerBios['PLAYER_ID'].map(fn_dict)
playerBios['PLAYER_LAST_NAME'] = playerBios['PLAYER_ID'].map(ln_dict)
playerBios['PLAYER_NAME'] = playerBios['PLAYER_ID'].map(n_dict)
playerBios['ACTIVE'] = playerBios['PLAYER_ID'].map(a_dict)

playerBios.rename(columns={
    'PLAYER_NAME':'Player',
    'PLAYER_HEIGHT':'Ht',
    'PLAYER_WEIGHT':'Wt',
    'COLLEGE':'College',
    'COUNTRY':'Country',
    'DRAFT_YEAR':'Draft Year',
    'DRAFT_ROUND':'Draft Round',
    'DRAFT_NUMBER':'Draft Pick',
    'NET_RATING':'Net Rating',
    'TEAM_ABBREVIATION':'Team'
},inplace=True)

playerBios[['PTS','REB','AST']] = round(playerBios[['PTS','REB','AST']].div(playerBios['GP'],axis=0),1)

In [8]:
sqldf = playerBios[[
    'PLAYER_ID',
    'PLAYER_LAST_NAME',
    'PLAYER_FIRST_NAME',
    'Player',
    'Ht',
    'PLAYER_HEIGHT_INCHES',
    'Wt',
    'AGE',
    'Draft Year',
    'Draft Round',
    'Draft Pick',
    'ACTIVE'
]]

sqldf = sqldf.rename(columns={
    'PLAYER_ID':'playerId',
    'PLAYER_LAST_NAME':'lastName',
    'PLAYER_FIRST_NAME':'firstName',
    'Player':'fullName',
    'Ht':'height',
    'PLAYER_HEIGHT_INCHES':'heightIn',
    'Wt':'weight',
    'AGE':'age',
    'Draft Year':'draftYear',
    'Draft Round':'draftRound',
    'Draft Pick':'draftPick',
    'ACTIVE':'active'
})

In [9]:
# sqldf.sort_values(by=['lastName','firstName'],ascending=[True,True]).set_index('playerId').to_csv('player.csv')

In [10]:
playerIds = []

for p in sqldf['playerId']:
    playerIds.append(p)

In [11]:
import urllib.request
import ssl
import os

def download_image(url, save_as):
    os.makedirs(os.path.dirname(save_as), exist_ok=True)
    # Create an SSL context that ignores verification (for testing)
    context = ssl._create_unverified_context()
    
    # Open the URL
    with urllib.request.urlopen(url, context=context) as response:
        data = response.read()
    
    # Write the image to a file
    with open(save_as, 'wb') as f:
        f.write(data)

In [29]:
i = 0
last = 0
for p in playerIds:
    image_url = 'https://cdn.nba.com/headshots/nba/latest/1040x760/' + str(p) + '.png'
    save_as = 'players/' + str(p) + '.png'
    # download_image(image_url, save_as)
    i += 1
    printLoad(i,len(playerIds))

100.0% █████████████████████████

In [12]:
len(playerIds)

569