# depth_chart_scraper

### Scrapes and organizes NBA depth charts from [*ESPN*](http://www.espn.com/nba/depth/_/type/print).

In [1]:
# Import dependencies
import numpy as np
import pandas as pd
import re

from urllib.request import urlopen
from bs4 import BeautifulSoup

In [2]:
# Scrape ESPN depth chart
html = urlopen('http://www.espn.com/nba/depth/_/type/print')
soup = BeautifulSoup(html, 'lxml')

In [3]:
# Dictionary of team abbreviations
team_abbrvs = {'Atlanta':'ATL', 'Boston':'BOS', 'Brooklyn':'BRK', 'Charlotte':'CHO',
               'Chicago':'CHI', 'Cleveland':'CLE', 'Dallas':'DAL', 'Denver':'DEN',
               'Detroit':'DET', 'Golden State':'GSW', 'Houston':'HOU', 'Indiana':'IND',
               'LA Clippers':'LAC', 'LA Lakers':'LAL', 'Memphis':'MEM', 'Miami':'MIA', 
               'Milwaukee':'MIL', 'Minnesota':'MIN', 'New Orleans':'NOP', 'New York':'NYK',
               'Oklahoma City':'OKC', 'Orlando':'ORL', 'Philadelphia':'PHI', 'Phoenix':'PHO',
               'Portland':'POR', 'Sacramento':'SAC', 'San Antonio':'SAS', 'Toronto':'TOR',
               'Utah':'UTA', 'Washington':'WAS'}

In [4]:
# Organize depth charts
teams = {}

for i in soup.find_all('font', {'face':'verdana'}):
    team_text = i.get_text().split('\n')
    city = team_text[0]
    teams[city] = {'Player':[], 'Depth':[], 'Team':[]}
    
    players = team_text[1:(len(team_text)-1)]
    for num, player in enumerate(players):
        info = player.split('-', 1)
        teams[city]['Depth'].append([int(char) for char in info[0] if char.isdigit()][0] )
        teams[city]['Player'].append(info[1].replace(" (IL)", ''))
        teams[city]['Team'].append(city)

In [5]:
# Save depth charts to dataframe
depth_chart = pd.DataFrame()

for team in teams:
    depth_chart = pd.concat([depth_chart, pd.DataFrame(teams[team])], axis=0)

depth_chart['teamAbbr'] = depth_chart.apply(lambda x: team_abbrvs[x['Team']], axis=1)
depth_chart = depth_chart.sort_values(['teamAbbr', 'Depth']).reset_index(drop=True)

In [6]:
depth_chart.to_csv('..\\data\\espn_depth_chart.csv', index=False)