In [157]:
from bs4 import BeautifulSoup
import requests
import requests_cache
from PandasBasketball import pandasbasketball as pb
from PandasBasketball.stats import player_stats, team_stats, player_gamelog, n_days
# from PandasBasketball.errors import StatusCode404, TableNonExistent 

In [32]:
requests_cache.install_cache('request_cache')

In [156]:
# Generate list of letter directories for player names (by their last names)
import string

letters = string.ascii_lowercase
players_letter_dirs = [f"http://www.basketball-reference.com/players/{letter}/" for letter in letters]
players_letter_dirs[:2]

['http://www.basketball-reference.com/players/a/',
 'http://www.basketball-reference.com/players/b/']

In [155]:
reqs = [requests.get(url) for url in player_url_letters]
reqs[:4]

[<Response [200]>, <Response [200]>, <Response [200]>, <Response [200]>]

In [38]:
requests.get('http://www.basketball-reference.com/players/f').from_cache

True

## Scrape player list pages for all names and the file name conventions for each

In [139]:
'''
The player name to file naming convention should be a dictionary with player name being key.
This will give a user the ability to lookup naming convention by name
'''
from bs4 import BeautifulSoup
from PandasBasketball.stats import get_data_master

f_request = requests.get("http://www.basketball-reference.com/players/f")
soup = BeautifulSoup(f_request.text, "html.parser")
player_table = soup.find("table", id="players")
df = get_data_master(player_table, "player")
df.head(1)

Unnamed: 0,Player,From,To,Pos,Ht,Wt,Birth Date,Colleges
0,Joe Fabel,1947,1947,F-G,6-1,190,"May 15, 1917",Pitt


## Retrieve full list of players all time and their filename encodings

In [152]:
# Generate list of player tables, one from each letter directory
players_tables = []
for letter_dir in players_letter_dirs:
    r = requests.get(letter_dir)
    soup = BeautifulSoup(r.text, "html.parser")
    players_table = soup.find("table", id="players")
    players_tables.append(players_table)

In [153]:
# Generate dictionary mapping every player name to their respective name encoding

player_dict = {}

for players_table in players_tables:
    players_name_rows = players_table.find("tbody").find_all("th")
    filenames = [row["data-append-csv"] for row in players_name_rows]

    for row in players_name_rows:
        split_name = row.a.text.split()
        first = split_name[0]
        last = split_name[-1]
        encoding = row["data-append-csv"]
        player_dict[f"{first} {last}"] = encoding

player_dict    



{'Alaa Abdelnaby': 'abdelal01',
 'Zaid Abdul-Aziz': 'abdulza01',
 'Kareem Abdul-Jabbar': 'abdulka01',
 'Mahmoud Abdul-Rauf': 'abdulma02',
 'Tariq Abdul-Wahad': 'abdulta01',
 'Shareef Abdur-Rahim': 'abdursh01',
 'Tom Abernethy': 'abernto01',
 'Forest Able': 'ablefo01',
 'John Abramovic': 'abramjo01',
 'Álex Abrines': 'abrinal01',
 'Alex Acker': 'ackeral01',
 'Don Ackerman': 'ackerdo01',
 'Mark Acres': 'acresma01',
 'Bud Acton': 'actonbu01',
 'Quincy Acy': 'acyqu01',
 'Alvan Adams': 'adamsal01',
 'Don Adams': 'adamsdo01',
 'George Adams': 'adamsge01',
 'Hassan Adams': 'adamsha01',
 'Jaylen Adams': 'adamsja01',
 'Jordan Adams': 'adamsjo01',
 'Michael Adams': 'adamsmi01',
 'Steven Adams': 'adamsst01',
 'Rafael Addison': 'addisra01',
 'Bam Adebayo': 'adebaba01',
 'Deng Adel': 'adelde01',
 'Rick Adelman': 'adelmri01',
 'Jeff Adrien': 'adrieje01',
 'Arron Afflalo': 'afflaar01',
 'Maurice Ager': 'agerma01',
 'Mark Aguirre': 'aguirma01',
 'Blake Ahearn': 'ahearbl01',
 'Danny Ainge': 'aingeda01'

In [167]:
# Get list of every player page to be scraped
full_player_urls = []
for letter_dir in player_letter_dirs:
    for encoding in player_dict.values():
        if encoding.startswith(letter_dir[-2]):
            full_player_urls.append(letter_dir+encoding+".html")
full_player_urls

['http://www.basketball-reference.com/players/a/abdelal01.html',
 'http://www.basketball-reference.com/players/a/abdulza01.html',
 'http://www.basketball-reference.com/players/a/abdulka01.html',
 'http://www.basketball-reference.com/players/a/abdulma02.html',
 'http://www.basketball-reference.com/players/a/abdulta01.html',
 'http://www.basketball-reference.com/players/a/abdursh01.html',
 'http://www.basketball-reference.com/players/a/abernto01.html',
 'http://www.basketball-reference.com/players/a/ablefo01.html',
 'http://www.basketball-reference.com/players/a/abramjo01.html',
 'http://www.basketball-reference.com/players/a/abrinal01.html',
 'http://www.basketball-reference.com/players/a/ackeral01.html',
 'http://www.basketball-reference.com/players/a/ackerdo01.html',
 'http://www.basketball-reference.com/players/a/acresma01.html',
 'http://www.basketball-reference.com/players/a/actonbu01.html',
 'http://www.basketball-reference.com/players/a/acyqu01.html',
 'http://www.basketball-refe

In [None]:
# Generate full player dataframe concatenated

dfs = []
for url in full_player_urls:
    df = player_stats(requests.get(url), "per_minute")

#     print(url)