In [8]:
import datetime
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse

def format_date(date):
    """
    Takes the date uniforms the format
    :param date_str:
        yyyymmdd(int)
    :return:
        list of ints: [yyyy,mm,dd]
    """
    return [int(str(date)[x:y]) for x, y in zip([0, 4, 6], [4, 6, 8])]

def get_date_range(range_list):
    """
    Takes a list of [start date, end date] and makes a list of the range (inclusive)
    :param range_list:
        [yyyymmdd, yyyymmdd]
    :return:
        a list of datetime.date objects
    """
    start_date = datetime.date(*format_date(range_list[0]))
    end_date = datetime.date(*format_date(range_list[1]))

    delta = end_date - start_date

    return [end_date - datetime.timedelta(days=n) for n in range(delta.days+1)]

In [12]:
def get_date(date):
    """
    Returns formatted date
    :param date:
        datetime.date instance
    """
    # convert date obj to strings and make sure single digit days have leading 0
    day, month, year = str(date.day).zfill(2), str(date.month), str(date.year)

    return 'mon={}&day={}&year={}'.format(month, day, year)

def get_url(date, site): 
    """
    creates the url to scrape from.
    :param date:
        formatted date to scrape from
        example: &mon=3&day=13&year=2016
    :param site:
        FantasyDuel or DraftKing
        example: fd | dk
    :return:
        formatted URL
    """
    return 'http://rotoguru1.com/cgi-bin/hyday.pl?game=' + site + '&' + get_date(date)


def fetch_player_data(date, url, site):
    """
    scrapes the player data from the given web page.
    :param date:
        current date to scrape
    :param url:
        url of the page 
        example: 'http://rotoguru1.com/cgi-bin/hyday.pl?game=fd&mon=3&day=13&year=2016'
    :param site:
        fd for FantasyDuel | dk for DraftKing
    :return:
        a list of player data
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    data = []
    data.append(["Position", "Name", "Fantasy Points", "Salary", "Away", "Home", "Score", "Minutes", "Stats", 
                 "Site", "ID", "Date", "First Name", "Last Name"])
    
    for n, player_row in enumerate(soup.find_all('tr')):
        pos = player_row.find('b')

        # Differentiate player rows by number of td tags.
        if len(player_row.findParents('table')) == 0 and\
                player_row.find('td', {'colspan':False}) and\
                len(player_row.find_all('b')) == 0:
            player_id = urlparse(player_row.find('a')['href']).query
            # Get the td tag text and format it.
            player_data = [data.text.replace(u'\xa0', u'').strip() for data in player_row.find_all('td')]
            player_data.append(site)
            player_data.append(player_id)
            player_data.append(date)
            player_data.append(player_data[1].split(",")[1].replace("^", ""))
            player_data.append(player_data[1].split(",")[0])
            
            data.append(player_data)
    
    return data

In [13]:
def main(date_range):
    date_list = get_date_range(date_range)
    
    for date in date_list:
        print(date)
        
        url = get_url(date, 'fd')
        
        print(url)

        scraped_data = fetch_player_data(str(date), url, 'fd')
        
        print(scraped_data)
        
        print(len(scraped_data), "rows of player data scraped")

In [15]:
#main([20151027, 20151028])