In [1]:
import pandas as pd
import sqlite3

## All-NBA

### Load Data

In [2]:
# Read in All NBA data from the last 10 years
allnba = pd.read_csv('./raw_data/allnba.csv')

### Clean Data

In [3]:
# build an ID based on season and All-NBA team #
allnba['id'] = allnba['Season'] + ' ' + allnba['Tm']

# make it fit for data consumption
allnba_cleaned = pd.wide_to_long(allnba, ['P'], i = 'id', j = 'Tm')

# Data String Formatting
allnba_cleaned[['FirstName', 'LastName', 'Position']] = allnba_cleaned['P'].str.split(' ', expand = True)
allnba_cleaned.loc[allnba_cleaned['LastName'].str.contains('Don'), 'LastName'] = 'Doncic'
allnba_cleaned.loc[allnba_cleaned['LastName'].str.contains('Joki'), 'LastName'] = 'Jokic'

# Transforming Data
allnba_cleaned = allnba_cleaned.reset_index(drop=True)
allnba_cleaned['Name'] = allnba_cleaned['FirstName'] + ' ' + allnba_cleaned['LastName']
allnba_cleaned['Season_start'] = allnba_cleaned['Season'].str[0:4]
allnba_cleaned = allnba_cleaned.drop(columns = ['Lg', 'Voting', 'P', 'FirstName', 'LastName', 'Position'])
allnba_cleaned['is_AllNBA'] = 1

## All Star

### Load Data

In [4]:
# Read in all star data
allstar = pd.read_csv('./raw_data/final_data.csv')

### Clean Data

In [5]:
# get all star data since 2010
allstar = allstar.loc[allstar['year'] > 2010, ['first', 'last', 'team', 'year']]

# Transform Data
allstar['Name'] = allstar['first'] + ' ' + allstar['last']
allstar['Season'] = allstar.apply(lambda x: str(x['year']) + '-' + str(x['year']+1)[2:], axis=1)
allstar = allstar.drop(columns = ['first', 'last'])
allstar['isAllStar'] = 1

## SQL Connection

In [6]:
con = sqlite3.connect('./nba_data.db')

# create SQL tables from the tables
allnba_cleaned.to_sql('allNBA', con, index=True, if_exists='replace')
allstar.to_sql('allStar', con, index=True, if_exists='replace')

con.close()