In [3]:
import json
import pandas as pd
import numpy as np
import mysql.connector
import os

In [4]:
# Getting the users data from database_init.json (add it to .gitignore)
with open('database_init.json') as file:
    db_config = json.load(file)
#creating NBA_DB if it is not already exists.
db_name = "NBA_DB"
try:
    temp_config = db_config.copy()
    cnxn = mysql.connector.connect(**temp_config)
    cursor = cnxn.cursor()

    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {db_name} COLLATE utf8mb4_unicode_ci")
    print(f"Database '{db_name}' created or already exists.")
    cnxn.database = db_name


except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Database 'NBA_DB' created or already exists.
Connection Closed.


In [13]:
mvp_players = pd.read_csv(os.path.join('data', 'mvp_players.csv'))
mvp_players['id'] = mvp_players['id'].str.extract(r'/./(.*)\.html')
mvp_players['rank'] = mvp_players['rank'].str.replace(r'[a-zA-Z]', '', regex=True).astype('Int64')
mvp_players

Unnamed: 0.1,Unnamed: 0,rank,player,age,team_id,votes_first,points_won,points_max,award_share,g,...,ast_per_g,stl_per_g,blk_per_g,fg_pct,fg3_pct,ft_pct,ws,ws_per_48,id,year
0,0,1,Giannis Antetokounmpo,24,MIL,78,941,1010,0.932,72,...,5.9,1.3,1.5,0.578,0.256,0.729,14.4,0.292,antetgi01,2019
1,1,2,James Harden,29,HOU,23,776,1010,0.768,78,...,7.5,2.0,0.7,0.442,0.368,0.879,15.2,0.254,hardeja01,2019
2,2,3,Paul George,28,OKC,0,356,1010,0.352,77,...,4.1,2.2,0.4,0.438,0.386,0.839,11.9,0.201,georgpa01,2019
3,3,4,Nikola Jokić,23,DEN,0,212,1010,0.210,80,...,7.3,1.4,0.7,0.511,0.307,0.821,11.8,0.226,jokicni01,2019
4,4,5,Stephen Curry,30,GSW,0,175,1010,0.173,69,...,5.2,1.3,0.4,0.472,0.437,0.916,9.7,0.199,curryst01,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,80,7,Anthony Edwards,23,MIN,0,12,1000,0.012,79,...,4.5,1.2,0.6,0.447,0.395,0.837,8.4,0.140,edwaran01,2025
81,81,9,Stephen Curry,36,GSW,0,2,1000,0.002,70,...,6.0,1.1,0.4,0.448,0.397,0.933,7.9,0.168,curryst01,2025
82,82,10,Jalen Brunson,28,NYK,0,1,1000,0.001,65,...,7.3,0.9,0.1,0.488,0.383,0.821,8.3,0.172,brunsja01,2025
83,83,10,James Harden,35,LAC,0,1,1000,0.001,79,...,8.7,1.5,0.7,0.410,0.352,0.874,8.3,0.143,hardeja01,2025


In [14]:
tbl_name = "AWARDS"
try:
    cnxn = mysql.connector.connect(**db_config)
    cursor = cnxn.cursor()
    cnxn.database = db_name

    cursor.execute(f"CREATE TABLE IF NOT EXISTS {tbl_name} ("
                   f"player_id VARCHAR(255) NOT NULL,"
                   f"Ranking INT NOT NULL,"
                   f"SEASON INT NOT NULL);")
    print(f"Table '{tbl_name}' created or already exists.")
    # Insert Dataframe into SQL Server:
    for index, row in mvp_players.iterrows():
        # print(f"inserting player {row['player_id']} year {row['Season']}")
        query = f"INSERT INTO {tbl_name} (player_id, Ranking, SEASON) VALUES (%s, %s, %s)"
        cursor.execute(query, (row['id'], row['rank'], row['year']))
    cnxn.commit()

except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Table 'AWARDS' created or already exists.
Error!!: 1364 (HY000): Field 'POS' doesn't have a default value
Connection Closed.


In [90]:
players = pd.DataFrame()
#adding season year to columns and extracting player_id
lst = []
for i in {2019, 2020, 2021, 2022, 2023, 2024, 2025}:
    path = os.path.join('top 50 player of mjt list', f'nba_top50_players_{i}.csv')
    tmp = pd.read_csv(path)
    tmp['Season'] = i
    lst.append(tmp)
players = pd.concat(lst, axis=0, ignore_index=True)
players['player_id'] = players['Player_Link'].str.extract(r'/./(.*)\.html')
players.drop(columns='Player_Link', inplace=True)

In [120]:
top_players = players[['player_id', 'Rk', 'Age', 'Team', 'Pos', 'PTS', 'Season']].copy()
top_players

Unnamed: 0,player_id,Rk,Age,Team,Pos,PTS,Season
0,hardeja01,1,29,HOU,PG,2818,2019
1,georgpa01,2,28,OKC,SF,2159,2019
2,walkeke02,3,28,CHO,PG,2102,2019
3,bealbr01,4,25,WAS,SG,2099,2019
4,lillada01,5,28,POR,PG,2067,2019
...,...,...,...,...,...,...,...
379,banede01,46,26,MEM,SG,1327,2025
380,mobleev01,47,23,CLE,PF,1316,2025
381,powelno01,48,31,LAC,SG,1306,2025
382,bridgmi02,49,26,CHO,PF,1300,2025


In [123]:
tbl_name = "TOP_PLAYERS"
try:
    cnxn = mysql.connector.connect(**db_config)
    cursor = cnxn.cursor()
    cnxn.database = db_name

    cursor.execute(f"CREATE TABLE IF NOT EXISTS {tbl_name} ("
                   f"player_id VARCHAR(255) NOT NULL,"
                   f"Rk INT NOT NULL,"
                   f"AGE INT NOT NULL,"
                   f"team_id VARCHAR(20) NOT NULL,"
                   f"POS VARCHAR(20) NOT NULL,"
                   f"PTS INT NOT NULL,"
                   f"SEASON INT NOT NULL);")
    print(f"Table '{tbl_name}' created or already exists.")
    # Insert Dataframe into SQL Server:
    for index, row in top_players.iterrows():
        query = f"INSERT INTO {tbl_name} (player_id, Rk, AGE, team_id, POS, PTS, SEASON) VALUES (%s, %s, %s, %s, %s, %s, %s)"
        cursor.execute(query, (
            row['player_id'],
            row['Rk'],
            row['Age'],
            row['Team'],
            row['Pos'],
            row['PTS'],
            row['Season']))
    cnxn.commit()

except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Table 'TOP_PLAYERS' created or already exists.
Connection Closed.


In [33]:
def feet_inch_to_cm(x):
    feet = int(x[0])
    inch = int(x[2:])
    return round(feet * 30.48 + inch * 2.54)


player_id_regex = r'/./(.*)\.html'
players_list = pd.read_csv(os.path.join('data', 'all_players.csv'))
players_list['player'] = players_list['player'].str.replace('*', '')
players_list['id'] = players_list['id'].str.extract(player_id_regex)
players_list['height'] = players_list['height'].map(feet_inch_to_cm)
players_list['birth_date'] = pd.to_datetime(players_list['birth_date'])
players_list['birth_date'] = players_list['birth_date'].dt.year.astype('Int64')
players_list

Unnamed: 0.1,Unnamed: 0,player,year_min,year_max,pos,height,weight,birth_date,colleges,id,is_active
0,0,Alaa Abdelnaby,1991,1995,F-C,208,240.0,1968,Duke,abdelal01,False
1,1,Zaid Abdul-Aziz,1969,1978,C-F,206,235.0,1946,Iowa State,abdulza01,False
2,2,Kareem Abdul-Jabbar,1970,1989,C,218,225.0,1947,UCLA,abdulka01,False
3,3,Mahmoud Abdul-Rauf,1991,2001,G,185,162.0,1969,LSU,abdulma02,False
4,4,Tariq Abdul-Wahad,1998,2003,F,198,223.0,1974,"Michigan,San Jose State",abdulta01,False
...,...,...,...,...,...,...,...,...,...,...,...
5308,5308,Ante Žižić,2018,2020,F-C,208,266.0,1997,,zizican01,False
5309,5309,Jim Zoet,1983,1983,C,216,240.0,1953,Kent State University,zoetji01,False
5310,5310,Bill Zopf,1971,1971,G,185,170.0,1948,Duquesne,zopfbi01,False
5311,5311,Ivica Zubac,2017,2025,C,213,240.0,1997,,zubaciv01,True


In [37]:
tbl_name = "PLAYERS_DETAIL"
try:
    cnxn = mysql.connector.connect(**db_config)
    cursor = cnxn.cursor()
    cnxn.database = db_name

    cursor.execute(f"CREATE TABLE IF NOT EXISTS {tbl_name} ("
                   f"player_id VARCHAR(255) NOT NULL PRIMARY KEY,"
                   f"FULL_NAME VARCHAR(255) NOT NULL,"
                   f"POS VARCHAR(31) NOT NULL,"
                   f"YEAR_MIN INT NOT NULL,"
                   f"YEAR_MAX INT NOT NULL,"
                   f"HEIGHT INT,"
                   f"WEIGHT INT,"
                   f"BIRTH_YEAR INT,"
                   f"COLLEGE VARCHAR(255),"
                   f"ACTIVE BOOLEAN NOT NULL);")
    print(f"Table '{tbl_name}' created or already exists.")
    # Insert Dataframe into SQL Server:
    for index, row in players_list.iterrows():
        query = f"INSERT INTO {tbl_name} (player_id, FULL_NAME, POS, YEAR_MIN, YEAR_MAX,  HEIGHT, WEIGHT, BIRTH_YEAR, COLLEGE, ACTIVE) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        cursor.execute(query, (
            row['id'],
            row['player'],
            row['pos'],
            row['year_min'],
            row['year_max'],
            row['height'],
            row['weight'] if pd.notna(row['weight']) else None,
            row['birth_date'] if pd.notna(row['birth_date']) else None,
            row['colleges'] if pd.notna(row['colleges']) else None,
            row['is_active']))
    cnxn.commit()

except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Table 'PLAYERS_DETAIL' created or already exists.
Error!!: 1054 (42S22): Unknown column 'FULL_NAME' in 'field list'
Connection Closed.


In [16]:
file_names = os.listdir('champ team')
lst = []
for file in file_names:
    if file[-2:] != "py":  # escaping python files
        path = os.path.join('champ team', file)
        tmp = pd.read_csv(path)
        lst.append(tmp)

winners = pd.concat(lst, axis=0, ignore_index=True)
winners

Unnamed: 0,No.,Player,Pos,Ht,Wt,Birth Date,Birth,Exp,College,player_link,team_id,year
0,0,Christian Braun,SG,6-6,218,"April 17, 2001",usUS,R,Kansas,https://www.basketball-reference.com/players/b...,DEN,2023
1,11,Bruce Brown,SF,6-4,202,"August 15, 1996",usUS,4,Miami (FL),https://www.basketball-reference.com/players/b...,DEN,2023
2,13,Thomas Bryant,C,6-10,248,"July 31, 1997",usUS,5,Indiana,https://www.basketball-reference.com/players/b...,DEN,2023
3,5,Kentavious Caldwell-Pope,SG,6-5,204,"February 18, 1993",usUS,9,Georgia,https://www.basketball-reference.com/players/c...,DEN,2023
4,31,Vlatko Čančar,PF,6-8,236,"April 10, 1997",siSI,3,,https://www.basketball-reference.com/players/c...,DEN,2023
...,...,...,...,...,...,...,...,...,...,...,...,...
132,22,Malachi Richardson,SG,6-6,205,"January 5, 1996",usUS,2,Syracuse,https://www.basketball-reference.com/players/r...,TOR,2019
133,43,Pascal Siakam,PF,6-8,230,"April 2, 1994",cmCM,2,New Mexico State,https://www.basketball-reference.com/players/s...,TOR,2019
134,17,Jonas Valančiūnas,C,6-11,265,"May 6, 1992",ltLT,6,,https://www.basketball-reference.com/players/v...,TOR,2019
135,23,Fred VanVleet,PG,6-0,197,"February 25, 1994",usUS,2,Wichita State,https://www.basketball-reference.com/players/v...,TOR,2019


In [22]:
winners['player_id'] = winners['player_link'].str.extract(r'/./(.*)\.html')
winners['Exp'] = winners['Exp'].str.replace('R', '0').astype('Int64')

In [27]:
tbl_name = "WINNER_TEAMS"
try:
    cnxn = mysql.connector.connect(**db_config)
    cursor = cnxn.cursor()
    cnxn.database = db_name

    cursor.execute(f"CREATE TABLE IF NOT EXISTS {tbl_name} ("
                   f"team_id VARCHAR(20) NOT NULL,"
                   f"player_id VARCHAR(255) NOT NULL,"
                   f"POS VARCHAR(20) NOT NULL,"
                   f"EXPERIENCE INT,"
                   f"YEAR INT NOT NULL);")
    print(f"Table '{tbl_name}' created or already exists.")
    # Insert Dataframe into SQL Server:
    for index, row in winners.iterrows():
        query = f"INSERT INTO {tbl_name} (team_id, player_id, POS, EXPERIENCE, YEAR) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (
            row['team_id'],
            row['player_id'],
            row['Pos'],
            row['Exp'],
            row['year']))
    cnxn.commit()

except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Table 'WINNER_TEAMS' created or already exists.
Connection Closed.


In [28]:
teams_list = pd.read_csv(os.path.join('team stat', 'nba_teams.csv'))
teams_list

Unnamed: 0,id,Team,Location,Seasons,Record,Playoff Appearances,Championships
0,OKC,Oklahoma City Thunder,"Oklahoma City, Oklahoma",59;\n \n 1967-68 to 2025-26,"2538-2150, .541 W-L%",34,2
1,IND,Indiana Pacers,"Indianapolis, Indiana",59 (50 NBA & 9 ABA);\n \n 1967-68 to 2025-26,"2407-2287, .513 W-L%\n \n (1980-1970 NBA &...",38\n \n (29 NBA & 9 ABA),3\n \n (0 NBA & 3 ABA)
2,PHI,Philadelphia 76ers,"Philadelphia, Pennsylvania",77;\n \n 1949-50 to 2025-26,"3125-2898, .519 W-L%",54,3
3,HOU,Houston Rockets,"Houston, Texas",59;\n \n 1967-68 to 2025-26,"2421-2267, .516 W-L%",35,2
4,POR,Portland Trail Blazers,"Portland, Oregon",56;\n \n 1970-71 to 2025-26,"2328-2116, .524 W-L%",37,1
5,SAS,San Antonio Spurs,"San Antonio, Texas",59 (50 NBA & 9 ABA);\n \n 1967-68 to 2025-26,"2717-1976, .579 W-L%\n \n (2339-1610 NBA &...",47\n \n (39 NBA & 8 ABA),5\n \n (5 NBA & 0 ABA)
6,BOS,Boston Celtics,"Boston, Massachusetts",80;\n \n 1946-47 to 2025-26,"3695-2501, .596 W-L%",62,18
7,NOP,New Orleans Pelicans,"New Orleans, Louisiana",24;\n \n 2002-03 to 2025-26,"852-998, .461 W-L%",9,0
8,MIN,Minnesota Timberwolves,"Minneapolis, Minnesota",37;\n \n 1989-90 to 2025-26,"1196-1680, .416 W-L%",13,0
9,ATL,Atlanta Hawks,"Atlanta, Georgia",77;\n \n 1949-50 to 2025-26,"2967-3052, .493 W-L%",49,1


In [30]:
tbl_name = "TEAMS_DETAILS"
try:
    cnxn = mysql.connector.connect(**db_config)
    cursor = cnxn.cursor()
    cnxn.database = db_name

    cursor.execute(f"CREATE TABLE IF NOT EXISTS {tbl_name} ("
                   f"id VARCHAR(20) NOT NULL PRIMARY KEY,"
                   f"NAME VARCHAR(255) NOT NULL,"
                   f"LOCATION VARCHAR(255) NOT NULL);")
    print(f"Table '{tbl_name}' created or already exists.")
    # Insert Dataframe into SQL Server:
    for index, row in teams_list.iterrows():
        query = f"INSERT INTO {tbl_name} (id, NAME, LOCATION) VALUES (%s, %s, %s)"
        cursor.execute(query, (
            row['id'],
            row['Team'],
            row['Location']))
    cnxn.commit()

except mysql.connector.Error as err:
    print(f"Error!!: {err}")
finally:
    if 'cnxn' in locals() and cnxn.is_connected():
        cursor.close()
        cnxn.close()
        print("Connection Closed.")

Table 'TEAMS_DETAILS' created or already exists.
Connection Closed.
