# Players Data #
* **imports**

In [101]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.select import Select
import time
import re

* **Selenium** - function to open a browser and retrieve it's page source in order to use it with Beautiful Soup.

In [9]:
def selenium(link):
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--remote-debugging-port=9222")
    options.add_argument("--window-size=1920x1080")
    driver = webdriver.Chrome(options=options)
    driver.get(link)
    wait = WebDriverWait(driver, 5)
    element = wait.until(EC.presence_of_element_located((By.XPATH, "//*[@id='__next']/div[2]/div[2]/section/div[4]/section[3]/div/div[2]/div[3]/table/tfoot")))
    element = wait.until(EC.presence_of_element_located((By.XPATH, "//*[@id='__next']/div[2]/div[2]/section/div[4]/div/section[1]/div/div/table/tbody/tr[2]/td[1]")))
    s = driver.page_source
    driver.quit()
    return s

**Initialize Variables**
* Creating a dataframe with all the players names and links we created in previous script (using beautiful soup)
* Creating all lists that will be used to store the data later into a new dataframe

In [8]:
# Read CSV file into a pandas DataFrame
df = pd.read_csv('nba_players_2.csv')
player_data = {
    'ppg': [],
    'apg': [],
    'rpg': [],
    'pie': [],
    'height': [],
    'weight': [],
    'country': [],
    'prev_team': [],
    'age': [],
    'bdate': [],
    'draft': [],
    'exp': [],
    'awards': [],
    'name': [],
    'hScore': [],
    'hReb': [],
    'hAssist': [],
    'hSteal': [],
    'hFieldGoal': [],
    'hFreeThrow': [],
    'hThreePoint': [],
    'hBlock': [],
    'hof': [],
    'totalPts': [],
    'totalGames': []
}

**The main script.**
* Using Selenium to retrieve page source since the page is dynamic.
* Retrieving each NBA player's data one by one with Selenium (total of 4820).
* Saving each data in a unique list.
* Handling Selenium not opening correct page sometimes.
* Handling non-active players not having their age displayed on the page.


In [6]:
### Loop through each row of the DataFrame ###

for index, row in df.iterrows():
    hof_flag = False
    counter = 0 # --> counting opening player's NBA page
    print(index)
    try:
        link = row['Link'] + "?PerMode=Totals"  # --> Get link from row, adding the mode = totals
        
        # Create Beautiful Soup object
        while True:
            if(counter > 5):
                raise Exception("Too many tries")
            temp = selenium(link)
            soup = BeautifulSoup(temp, 'html.parser')
            if soup("div" ,{"class":"Block_blockContent__6iJ_n 0px"}):
                break
            else:
                time.sleep(2)
                count += 1
    except:
        print("Fail to open")
        
    
    ### Fetching First and Last name ###
    
    try:
        firstName = soup("p" ,{"class":"PlayerSummary_playerNameText___MhqC"})[0].get_text()
        lastName = soup("p" ,{"class":"PlayerSummary_playerNameText___MhqC"})[1].get_text()
        player_date['name'].append(f'{firstName} {lastName}')
    except:
        print("Fail to find name")
        
        
    ### Fetching Career Totals ###
    
    try:
        careerSoup = soup("tfoot" ,{"class":"Crom_footer__6iyse"})[0]
        totalGames.append(careerSoup("td")[3].get_text())
        totalPts.append(careerSoup("td")[6].get_text())
    except:
        print("Fail to find career totals")    
        
        
    ### Fetching Total Awards and if is Hall of Fame ###
    
    sum_awards = 0
    try: 
        awardsSoup = soup("div" ,{"class":"Block_blockContent__6iJ_n 0px"})[-1]

        for div in awardsSoup("div",{"class":"PlayerStatsCareer_row___1XFs py-2 px-3"}):
            x = div("span")[0].get_text()
            if 'Hall of Fame' in x:
                hof_flag = True
            sum_awards += int(x('strong').get_text())
    except:
        print("Failed with HOF")
        
    player_data['hof'].append(hof_flag)
    player_data['awards'].append(sum_awards)

     # Get the ppg,aps,rpg,pie
    player_data['ppg'].append(soup("p" ,{"class":"PlayerSummary_playerStatValue___EDg_"})[0].get_text())
    player_data['rpg'].append(soup("p" ,{"class":"PlayerSummary_playerStatValue___EDg_"})[1].get_text())
    player_data['apg'].append(soup("p" ,{"class":"PlayerSummary_playerStatValue___EDg_"})[2].get_text())
    try:
        player_data['pie'].append(soup("p" ,{"class":"PlayerSummary_playerStatValue___EDg_"})[3].get_text())
    except:
        print("failed to find pie")
        
    
    ### Fetching Basic Information About The Player ###
    
    lenP = len(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"}))
    for i in range(7):
        temp = soup("p" ,{"class":"PlayerSummary_playerInfoLabel__hb5fs"})[i].get_text()
        if temp == "HEIGHT":
            player_data['height'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "WEIGHT":
            player_data['weight'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "COUNTRY":
            player_data['country'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "LAST ATTENDED":
            player_data['prev_team'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "AGE":
            player_data['age'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "BIRTHDATE":
            player_data['bdate'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "DRAFT":
            player_data['draft'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())
        elif temp == "EXPERIENCE":
            player_data['exp'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[i].get_text())

    # age data is missing in the page       
    if lenP == 12:
        player_data['age'].append("--")
    else:
        player_data['exp'].append(soup("p" ,{"class":"PlayerSummary_playerInfoValue__JS8_v"})[7].get_text())

        
        ### Career Highs (points, assists, rebounds...) ###
        
    try:
        highSoup = soup("table" ,{"class":"table border-yellow border-b w-full"})[0]
        for tr in highSoup("tr",{"class":"PlayerStatsCareer_row___1XFs"}):
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Points "):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hScore'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hScore'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Rebounds "):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hReb'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hReb'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")

            
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Assists "):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hAssist'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hAssist'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Steals "):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hSteal'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hSteal'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Field Goals Made"):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hFieldGoal'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hFieldGoal'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Free Throws Made"):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hFreeThrow'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hFreeThrow'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Three Pointers Made"):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hThreePoint'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hThreePoint'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")

                    
            if(tr("td" ,{"class":"p-2 text-sm"})[0].get_text() == "Blocks"):
                try:
                    if(len(tr("strong")) == 1):
                        player_data['hBlock'].append(tr("strong")[0].get_text())
                    else:
                        player_data['hBlock'].append(tr("strong")[1].get_text())
                except:
                    print("failed with high parmenters.")
                    
    except:
        print("Failed With Highes")
        
    ### <------  end of fetching Career Highs -------> ###
        
        
    ### Adding missing values ###    
    
    for value in player_data.values():
        if len(value) != index+1:
            value.append('--')            

0
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.
failed with high parmenters.
1
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.
failed with high parmenters.
2
Fail to find name
Fail to find career totals
Failed with HOF
failed to find pie
failed with high parmenters.
3
Fail to find name
Fail to find career totals
Failed with HOF
failed to find pie
4
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.
5


Exception ignored in: <function Service.__del__ at 0x00000214C5845B20>
Traceback (most recent call last):
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 190, in __del__
    self.stop()
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 151, in stop
    self.send_remote_shutdown_command()
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 130, in send_remote_shutdown_command
    request.urlopen(f"{self.service_url}/shutdown")
  File "D:\Program Files\Python\Lib\urllib\request.py", line 216, in urlopen
    return opener.open(url, data, timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\Program Files\Python\Lib\urllib\request.py", line 519, in open
    response = self._open(req, data)
               ^^^^^^^^^^^^^^^^^^^^^
  File "D:\Program Files\Python\Lib\urllib\request.py", line 536, in _open
    result = self._call_chain(self.handle_open, prot

Fail to open
Fail to find name
Fail to find career totals
failed to find pie
Failed With Highes
6
Fail to open


Exception ignored in: <function Service.__del__ at 0x00000214C5845B20>
Traceback (most recent call last):
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 190, in __del__
    self.stop()
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 151, in stop
    self.send_remote_shutdown_command()
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 135, in send_remote_shutdown_command
    if not self.is_connectable():
           ^^^^^^^^^^^^^^^^^^^^^
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\service.py", line 124, in is_connectable
    return utils.is_connectable(self.port)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\Program Files\Python\Lib\site-packages\selenium\webdriver\common\utils.py", line 102, in is_connectable
    socket_ = socket.create_connection((host, port), 1)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Fail to find name
Fail to find career totals
Failed with HOF
failed to find pie
failed with high parmenters.
7
Fail to open
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.
8
Fail to open
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.
9
Fail to open
Fail to find name
Fail to find career totals
failed to find pie
failed with high parmenters.


 **Saving the new data**

After collecting all the data from NBA.com we needed to create a new dataframe with all the new data and add it to our previous data.
* Used pd.concat to merge the 2 different dataframes.

In [5]:
# creating a data frame with all the scraped data

df_new = pd.DataFrame({
  'Name': player_data['name'],
  'Points': player_data['ppg'],
  'Asists': player_data['apg'],
  'Rebound': player_data['rpg'],
  'Pie': player_data['pie'],
  'Height': player_data['height'],
  'Weight': player_data['weight'],
  'Country': player_data['country'],
  'Previous Team': player_data['prev_team'],
  'Age': player_data['age'],
  'Birthdate': player_data['bdate'],
  'Draft': player_data['draft'],
  'Experience': player_data['exp'],
  'Total Awards': player_data['awards'],
  'High Score': player_data['hScore'],
  'High Rebound': player_data['hReb'],
  'High Assist': player_data['hAssist'],
  'High Steal': player_data['hSteal'],
  'High Field Goals': player_data['hFieldGoal'],
  'High Free Throws': player_data['hFreeThrow'],
  'High Three Pointers': player_data['hThreePoint'],
  'High Block': player_data['hBlock'],
  'Total Points': player_data['totalPts'],
  'Total Games': player_data['totalGames']
})

# Export the dataframe to a csv file
df = pd.concat([df, df_new], axis = 1)
df.to_csv('nba_players_scraped.csv', index=False)

# API Request


**Player's Position**
* Fetching player's playing position using an API

**Initialize Variables**

In [7]:
df_old = pd.read_csv('nba_players_scraped.csv')
Position = []

**Main Script**

In [None]:
for index, row in df.iterrows():
    player = row['Name']
    url = "https://balldontlie.io/api/v1/players?search=" + player
    responseJson = requests.get(url)
    response = responseJson.json()
    data = response['data']
    if type(data) == list:
        position = data[0]['position']
    else:
        position = data['position']
    Position.append(position)

**Saving Position to the CSV file**

In [None]:
df_pos = pd.DataFrame({'Position': Position})
df = pd.concat([df_old, df_pos], axis = 1)
df.to_csv('nba_players_scraped.csv', index=False)