# Fantasy Basketball

##### Projecting FG%, FT%, 3PM, Points, Rebounds, Assists, Steals, Blocks and Turnovers based on:
* historical performance in these categories
* team characteristics (pace, W/L, etc.)
* minutes that the player will get (may depend on some subjective factors and who else may be playing on their team)
* injuries (games played)

##### Data that we will need to scrape:
* player data for each season
* team data for each season
* coach data for each season

### Player Data Scraping

In [1]:
import requests
import re
import pandas as pd
import string
from bs4 import BeautifulSoup

ImportError: No module named requests

In [2]:
base_url = 'https://www.basketball-reference.com'

In [None]:
def parse_players(page_text):
    soup = BeautifulSoup(page_text, "lxml")
    rows = soup.find('table', id="players").find_all('tr')
    players_df = pd.DataFrame()
    #first row appears to be headers
    for row in rows[1:]:
        row_df = pd.DataFrame() # temp new dataframe
        th = row.find('th')
        canonical = th['data-append-csv']
        name = th.text
        link = th.find('a')
        row_df['link'] = [link['href']]
        row_df['name'] = [name]
        row_df['canonical'] = [canonical]
        tds = row.find_all('td')
        for td in tds:
            header = td['data-stat']
            value = td.text
            row_df[header] = [value]
        players_df = players_df.append(row_df)
    return players_df

In [None]:
def parse_player_seasons(page_text):
    soup = BeautifulSoup(page_text, "lxml")
    name = soup.find('div', itemtype=r"https://schema.org/Person").find(itemprop="name").text
    canonical = soup.find('link', rel="canonical")['href'].split('/')[-1].replace(r'.html','')
    per_game_regex = re.compile('per_game')
    rows = soup.find_all('tr', id=per_game_regex)
    # initialize the DF for per game stats
    df = pd.DataFrame()
    for row in rows:
        row_df = pd.DataFrame() # temp new dataframe
        row_df['canonical'] = [canonical]
        row_df['name'] = [name]
        th = row.find('th')
        header = th['data-stat']
        value = th.text
        row_df[header] = [value]
        tds = row.find_all('td')
        for td in tds:
            header = td['data-stat']
            value = td.text
            row_df[header] = [value]
        df = df.append(row_df)
    return df

In [None]:
players_df = pd.DataFrame()
paths = ['/players/{}/'.format(char) for char in list(string.ascii_lowercase)]
player_fails = []
print('=== DOWNLOADING PLAYERS ===')
for path in paths:
    fullpath = base_url + path
    response = requests.get(fullpath)
    if response.status_code == 200:
        new_df = parse_players(response.text)
        players_df = players_df.append(new_df)
        print('Players Downloaded Successfully: {}'.format(fullpath))
    else:
        print('Player Download Failed: {}'.format(fullpath))
        player_fails.append(fullpath)
players_df.reset_index(drop=True, inplace=True)
players_df

In [None]:
player_seasons_df = pd.DataFrame()
player_links = players_df['link'].tolist()
player_season_fails = []
print('=== DOWNLOADING PLAYER SEASONS ===')
for player_link in player_links:
    fullpath = base_url + player_link
    response = requests.get(fullpath)
    if response.status_code == 200:
        new_df = parse_player_seasons(response.text)
        player_seasons_df = player_seasons_df.append(new_df)
        print('Player Season Downloaded Successfully: {}'.format(fullpath))
    else:
        print('Player Season Download Failed: {}'.format(fullpath))
        player_season_fails.append(fullpath)
player_seasons_df.reset_index(drop=True, inplace=True)
player_seasons_df

In [None]:
# write everything to file
players_df.to_csv('player_list.csv')
player_seasons_df.to_csv('player_seasons_list.csv')