In [1]:
from bs4 import BeautifulSoup
from urllib import request
import pandas as pd

In [2]:
position_codes = [1, 2, 3, 4, 7, 8]
position_idx = 1
csv_file_name = 'qb_proj_2015.csv'
row_class_name = 'projected'

In [3]:
## this has to be programmatically constructed to fetch data for various positions and
## for different years etc.
url_prefix = 'http://fantasy.nfl.com/research/projections?position='
url_suffix = '&statCategory=projectedStats&statSeason=2015&statType=seasonProjectedStats&statWeek=1'

url_link = url_prefix + str(position_idx) + url_suffix

In [4]:
## read data from url and extract teh table
# response = request.urlopen(url_link)
# html = response.read()

# soup = BeautifulSoup(html, 'lxml')
# table = soup.find('div', {'class': 'tableWrap'})

# if table is None:
#     print('Table not found. url may be misconstructed.')
# else:
#     print('Table Found. Good to go ahead.')

# table_body = table.find('tbody')
# table_rows = table_body.findAll('tr')

In [5]:
def get_table_rows(prefix, index, suffix):
    current_url = prefix + str(index) + suffix
    
    response = request.urlopen(current_url)
    html = response.read()

    soup = BeautifulSoup(html, 'lxml')
    table = soup.find('div', {'class': 'tableWrap'})

    if table is None:
        # print('Table not found. url may be misconstructed.')
        table_rows = []
    else:
        # print('Table Found. Good to go ahead.')
        table_body = table.find('tbody')
        table_rows = table_body.findAll('tr')
    
    return table_rows


def get_data_frame_from_rows(rows):
    data = {}
    for row in rows:
        ## get projected score
        ## have to change for historic data
        score = row.findAll('td', {'class': row_class_name})[0].string.strip()
        name_card = row.findAll('td', {'class', 'playerNameAndInfo'})[0].find('div')

        player_name = name_card.find('a', {'class': 'playerName'}).string.strip()
        pos_team = name_card.find('em').string.strip().split('-')
        pos, team = pos_team[0].strip(), '' if len(pos_team) == 1 else pos_team[1].strip()

        data[player_name] = {'pos': pos, 'team': team, 'score': score}
        
    data_frame = pd.DataFrame.from_dict(data, orient='index')
    data_frame['score'] = data_frame['score'].astype('float')
        
    return data_frame

In [6]:
# create urls for different pages.
# initially, we are on the first page. Assume the results are 25 entires long.
data_frames = []
iter = 0
iterate = True

while(iterate):
    offset_suffix = ''
    if iter > 0:
        offset_suffix = '&offset=' + str((iter*25)+1)
    iter_suffix = url_suffix + offset_suffix
    table_rows = get_table_rows(url_prefix, position_idx, iter_suffix)
    if len(table_rows) > 0:
        df = get_data_frame_from_rows(table_rows)
        data_frames.append(df)
        iter = iter + 1
    iterate = len(table_rows) >= 25   

In [7]:
agg_df = pd.concat(data_frames, axis=0)

## remove guys with score == 0
red_df = agg_df.loc[agg_df['score'] > 0.0]
red_df = red_df.sort_values(by='score', ascending=False)
red_df.loc[red_df['team'] == '', 'team'] = 'NAN'
red_df.to_csv(csv_file_name)

In [None]:
## Sample code for one player

# ## get projected score
# ## have to change for historic data
# score = table_rows[0].findAll('td', {'class': 'projected'})[0].string.strip()
# name_card = table_rows[0].findAll('td', {'class', 'playerNameAndInfo'})[0].find('div')

# player_name = name_card.find('a', {'class': 'playerName'}).string.strip()
# pos, team = name_card.find('em').string.strip().split('-')
# pos, team = pos.strip(), team.strip()

# print(player_name, pos, team, score)

In [None]:
# data = {}

# for row in table_rows:
#     ## get projected score
#     ## have to change for historic data
#     score = row.findAll('td', {'class': 'projected'})[0].string.strip()
#     name_card = row.findAll('td', {'class', 'playerNameAndInfo'})[0].find('div')

#     player_name = name_card.find('a', {'class': 'playerName'}).string.strip()
#     pos_team = name_card.find('em').string.strip().split('-')
#     pos, team = pos_team[0].strip(), '' if len(pos_team) == 1 else pos_team[1].strip()
    
#     data[player_name] = {'pos': pos, 'team': team, 'score': score}

In [None]:
# data_frame = pd.DataFrame.from_dict(data, orient='index')
# data_frame['score'] = data_frame['score'].astype('float')
# data_frame.sort_values(by='score', ascending=False)