# Getting Player Stats Data

In [1]:
#This notebook uses BeautifulSoup to grab the data from the specified webpages of basketball reference
#The function takes the year you are looking for and can also take different stat types
#The options available to scrape are: 
                    # totals , per_game , per_minute , per_poss , advanced , play_by_play , shooting, adj_shooting

#Unlike the ContractData notebook, this notebook uses the .to_csv() function in order to save the data into a CSV in the assets folder
#These saves were done manually but could be built as a fucntion to speed up data extraction if multiple stat types are used.

In [2]:
#Imports 

import pandas as pd
from bs4 import BeautifulSoup
import requests

In [3]:
def Get_Player_Stats_HTML(years, stat_type):
    url = f'https://www.basketball-reference.com/leagues/NBA_20{years}_{stat_type}.html'
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')

    table = soup.find('table')
    extracted_data = []

    if table:
        tbody = table.find('tbody')
        rows = tbody.find_all('tr')

        for row in rows:
            cells = row.find_all(['td','th'])
            cell_texts = [cell.get_text(strip=True) for cell in cells]

            cell_texts.insert(0, f'20{years - 1}')
            extracted_data.append(cell_texts)

    else:
        print('No <table> found.')
    

    return extracted_data

## Player Advanced Stats
#### stat_type = 'advanced'

In [6]:
# Creating the advanced stats csv
#We will start with declaring the years that we want to grab from the URLs
years = [12,13,14,15,16,17,18,19,20,21,22,23,24,25]
all_data = []

for year in years: 
    stats = Get_Player_Stats_HTML(year, 'advanced')
    all_data.extend(stats)

#Declare the names of the columns that we will use in our DF - Columns from the Advanced Stats on Basketball Reference
columns_adv = [
    'Season', 'Rank', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'PER',
    'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
    'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP', 'Honors'
]
df_adv = pd.DataFrame(all_data, columns=columns_adv)


#Here we will just save our DF created as a CSV file unlike in the Contract Data
df_adv.to_csv('Assets/NBA_Player_Advanced_Stats.csv', index=False)

total_rows = len(df_adv)
print(f'Your CSV file has been created and saved to the Assets Folder, it contains {total_rows} rows of player data')

KeyboardInterrupt: 

## Player Total Stats
#### stat_type = 'totals'

In [5]:
# creating the total stats csv
years = [12,13,14,15,16,17,18,19,20,21,22,23,24,25]
all_data = []

for year in years: 
    stats = Get_Player_Stats_HTML(year, 'totals')
    all_data.extend(stats)


#Declare the names of the columns that we will use in our DF - Columns from the Total Season Stats on Basketball Reference
columns_total = [
    'Season', 'Rank', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP',
    'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%',
    'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
    'TOV', 'PF', 'PTS', 'Trp-Dbl', 'Honors'
]
df_total = pd.DataFrame(all_data, columns=columns_total)

#Here we will just save our DF created as a CSV file unlike in the Contract Data
df_total.to_csv('Assets/NBA_Player_Total_Stats.csv', index=False)

total_rows = len(df_total)
print(f'Your CSV file has been created and saved to the Assets Folder, it contains {total_rows} rows of player data')

Your CSV file has been created and saved to the Assets Folder, it contains 9262 rows of player data
