# Web Scraping 

This is the script we used for scraping the data from the official NBA website. This script was run locally to be able to use Selenium. We then created CSV-Files of the scraped data and saved them in our repository and on the Jupyter Lab Server so that we can access and produce the data remotely.

### Required Imports 

In [2]:
import csv
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
import time

### Interacting with the Browser by using ChromeDriverManager

In [101]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.nba.com/stats/players/traditional?Season=1998-99&SeasonType=Regular+Season")

continue_button = driver.find_element(By.CLASS_NAME, "onetrust-close-btn-handler")
continue_button.click()

wait = WebDriverWait(driver, 10)
wait.until(EC.staleness_of(continue_button))

True

### Function to retrieve NBA data for a specific season

In [2]:
def data_nba_season(season):
    try:
        option_all = select_element.find_element(By.XPATH, "//option[text()='All']")
        option_all.click()
    except NoSuchElementException:
        print("The 'All' option could not be found.")

    # Tabellenzeilen finden
    rows = driver.find_elements(By.XPATH, "//table[@class='Crom_table__p1iZz']//tbody//tr")

    data = []

    # Über jede Zeile iterieren und die Werte aller Spalten abrufen
    for row in rows:
        columns = row.find_elements(By.TAG_NAME, "td")
        row_data = {
            'PLAYER_NAME': columns[1].text,
            'TEAM_ABBREVIATION': columns[2].text,
            'AGE': columns[3].text,
            'GP': columns[4].text,
            'W': columns[5].text,
            'L': columns[6].text,
            'MIN': columns[7].text,
            'PTS': columns[8].text,
            'FGM': columns[9].text,
            'FGA': columns[10].text,
            'FG_PCT': columns[11].text,
            'FG3M': columns[12].text,
            'FG3A': columns[13].text,
            'FG3_PCT': columns[14].text,
            'FTM': columns[15].text,
            'FTA': columns[16].text,
            'FT_PCT': columns[17].text,
            'OREB': columns[18].text,
            'DREB': columns[19].text,
            'REB': columns[20].text,
            'AST': columns[21].text,
            'TOV': columns[22].text,
            'STL': columns[23].text,
            'BLK': columns[24].text,
            'PF': columns[25].text,
            'NBA_FANTASY_PTS': columns[26].text,
            'DD2': columns[27].text,
            'TD3': columns[28].text,
            'PLUS_MINUS': columns[29].text
        }
        data.append(row_data)

    # Die abgerufenen Daten in einer CSV-Datei speichern
    filename = "./Data/" +season + ".csv"
    fieldnames = list(data[0].keys())

    if not os.path.exists(filename):
        with open(filename, 'w', newline='') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()

    with open(filename, 'a', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writerows(data)

    return "Data saved " + filename + " successfully"


### Select Data from the 1996 season and create a CSV file

In [118]:
saison = "1996"
select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("1996-97")

time.sleep(1)
data_nba_season(saison)

'Data saved 1996.csv successfully'

### Select Data from the 1997 season and create a CSV file

In [119]:
saison = "1997"
select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("1997-98")

time.sleep(1)
data_nba_season(saison)

'Data saved 1997.csv successfully'

### Select Data from the 2005 season and create a CSV file

In [120]:
saison = "2005"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2005-06")

time.sleep(1)
data_nba_season(saison)


'Data saved 2005.csv successfully'

### Select Data from the 2006 season and create a CSV file

In [121]:
saison = "2006"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2006-07")

time.sleep(1)
data_nba_season(saison)


'Data saved 2006.csv successfully'

### Select Data from the 2007 season and create a CSV file

In [122]:
saison = "2007"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2007-08")

time.sleep(1)
data_nba_season(saison)


'Data saved 2007.csv successfully'

### Select Data from the 2011 season and create a CSV file

In [123]:
saison = "2011"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2011-12")

time.sleep(1)
data_nba_season(saison)


'Data saved 2011.csv successfully'

### Select Data from the 2012 season and create a CSV file

In [124]:
saison = "2012"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2012-13")

time.sleep(1)
data_nba_season(saison)


'Data saved 2012.csv successfully'

### Select Data from the 2013 season and create a CSV file

In [125]:
saison = "2013"

select_element = driver.find_element(By.CLASS_NAME, "DropDown_select__4pIg9")
select = Select(select_element)

select.select_by_value("2013-14")

time.sleep(1)
data_nba_season(saison)


'Data saved 2013.csv successfully'

### Close Driver

In [None]:
driver.quit()