In [1]:
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
import time
import sqlite3
import os

In [2]:
playersList = players.get_active_players()

In [3]:
# takes a long time to run
# obtain current player info
player_bios = []
for player in playersList:
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player['id']).get_normalized_dict()
    player_bios.append(player_info['CommonPlayerInfo'][0])
    time.sleep(2)

In [4]:
# Make a DataFrame from the list of players
player_df = pd.DataFrame(player_bios)

In [5]:
# Keep the relevant info, i.e. Name, Team, Years, NBA 75
keep_columns = ["PERSON_ID", "DISPLAY_FIRST_LAST", "TEAM_ID", "TEAM_ABBREVIATION", "FROM_YEAR", "TO_YEAR", "GREATEST_75_FLAG"]

player_df_new = player_df[keep_columns]

In [6]:
# write to a new CSV
player_df_new.to_csv('./intermediate_data/curr_team_players.csv', header=True, index=False)

## Merge with Finances

In [7]:
sqliteConnection = sqlite3.connect('nba_data.db')

In [8]:
curr_team_players = pd.read_csv('./intermediate_data/curr_team_players.csv')

In [9]:
finance = pd.read_csv('./raw_data/Finance.csv')

In [10]:
def remap_func(col1, col2):
    if col1==0:
        return 'Free Agent'
    return col2

finance['Type'] = finance.apply(lambda x: remap_func(x['Salary'], x['Type']), axis=1)

In [11]:
curr_team_players.to_sql(name='curr_team_players',
                         con=sqliteConnection,
                         index=True,
                         if_exists='replace')

531

In [12]:
finance.to_sql(name='finance',
               con=sqliteConnection,
               index=True,
               if_exists='replace')

3672

In [13]:
sqliteConnection = sqlite3.connect('nba_data.db')
# Create cursor object 
cursor = sqliteConnection.cursor() 
  
# Query for INNER JOIN 
sql = '''SELECT C.PERSON_ID, F.Player, C.TEAM_ABBREVIATION, F.Season, F.Salary, F.Type
FROM finance F
LEFT JOIN curr_team_players C
ON F.Player = C.DISPLAY_FIRST_LAST;'''
  
# Executing the query 
cursor.execute(sql) 
  
# Fetching rows from the result table 
result = cursor.fetchall() 

final_team_df = pd.DataFrame(result, columns = ['ID', 'Name', 'Team', 'Season', 'Salary', 'Type'])
  
# Closing the connection 
sqliteConnection.close() 

In [14]:
final_team_df.to_csv('./intermediate_data/curr_player_salaries.csv', index=False)