In [1]:
import pandas as pd
import sqlite3

## Free Agents 2010-20

### Load Data

In [2]:
fa_2010 = pd.read_csv("./raw_data/nba_contracts_history.csv")

### Transform Data

In [3]:
# Drop Unnecessary Columns
fa_2010 = fa_2010.drop(columns = ['CONTRACT_END', 'W', 'L', 'FTM', 'FTA', 'FT%', 'OREB', 'DREB', 'PF'])

# Build helpful transformed features
fa_2010['eFG'] = (fa_2010['FGM'] + 0.5 * fa_2010['3PM']) / fa_2010['FGA']
fa_2010['PPG'] = fa_2010['PTS'] / fa_2010['GP']
fa_2010['APG'] = fa_2010['AST'] / fa_2010['GP']
fa_2010['RPG'] = fa_2010['REB'] / fa_2010['GP']
fa_2010['SPG'] = fa_2010['STL'] / fa_2010['GP']
fa_2010['BPG'] = fa_2010['BLK'] / fa_2010['GP']
fa_2010['MPG'] = fa_2010['MIN'] / fa_2010['GP']
fa_2010['PREV_YEAR'] = fa_2010['CONTRACT_START'] - 1

## SQL Connection: Connect Free Agent Data to All-Star & All-NBA Data

In [4]:
con = sqlite3.connect('./nba_data.db')
cursor = con.cursor()

# get the All-Star and All-NBA tables
allstar = pd.read_sql_query("SELECT * FROM allStar", con)
allnba = pd.read_sql_query("SELECT * FROM allNBA", con)

con.close()

In [5]:
con = sqlite3.connect('./nba_data.db')
cursor = con.cursor()

# write the free agent 2010-20 dataset as a SQL table
fa_2010.to_sql('FA2010', con, index=True, if_exists='replace')

# Join FA data with all-Star and all-NBA data
sql = '''SELECT FA.NAME, FA.CONTRACT_START, FA.AVG_SALARY, FA.AGE, FA.EFG,
FA.PPG, FA.APG, FA.RPG, FA.SPG, FA.BPG, FA.MPG, allStar.isAllStar, aN.is_AllNBA
FROM FA2010 FA
LEFT JOIN allStar ON (FA.NAME = allStar.Name AND FA.PREV_YEAR = allStar.year)
LEFT JOIN allNBA aN ON (FA.NAME = aN.Name AND FA.PREV_YEAR = aN.Season_start)
'''

cursor.execute(sql)

result = cursor.fetchall()

fa_2010_full = pd.DataFrame(result, columns = ['Name', 'FA_Year', 'AVG_SALARY', 'Age',
                                               'eFG', 'PPG', 'APG', 'RPG', 'SPG', 'BPG',
                                               'MPG', 'is_AllStar', 'is_AllNBA'])

# data cleansing
fa_2010_full['is_AllStar'] = fa_2010_full['is_AllStar'].fillna(value=0)
fa_2010_full['is_AllNBA'] = fa_2010_full['is_AllNBA'].fillna(value=0)

# write joined table to SQL table
fa_2010_full.to_sql('FA2010_FULL', con, index=True, if_exists='replace')

con.close()