In [10]:
from urllib2 import urlopen
import pandas as pd
from bs4 import BeautifulSoup
import re
import os
from collections import OrderedDict
from datetime import datetime, date
import csv


def get_timestamp():
    timestamp = datetime.now()
    return timestamp.strftime('%m/%d/%Y-%H:%M:%S')

In [11]:
def make_scores_df2(year, url_template, teams_df, scores_f):
    # dateparse = lambda x: datetime.strptime(x, '%m/%d/%Y')
    # scores_df = pd.read_csv(scores_f, na_filter=False, parse_dates=['game_date'], date_parser=dateparse)
    scores_df = pd.read_csv(
        scores_f, na_filter=False, parse_dates=['game_date'])
    scores_df2 = scores_df.loc[
        (scores_df['home_score'] == 'N/A') & (scores_df['game_date'] < date.today())]

    start_all = datetime.now()
    new_scores_count = 0
    for index, row in scores_df2.iterrows():
        start = datetime.now()

        home_team = scores_df2.loc[index, 'home_team']
        team_url_name = teams_df.loc[
            teams_df['team_abbrv'] == home_team, 'team_url_name'].to_string()

        # r = request.get(url_template.formatrow['home_team'].lower(), year, row['team_url_name'])
        # schedule_table = BeautifulSoup(r.text).table
        url = url_template.format(home_team.lower(), year, team_url_name)
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')

        sched_data = soup.find_all(
            name='tr', attrs={'class': re.compile('row')})

        # schedule_table = soup.table
        for row in sched_data:
            cols = row.find_all('td')

            other_team = (
                cols[1].find_all('a')[1]['href'].split('/')[-2]).upper()

            try:
                date_raw = cols[0].text
                date_nums = datetime.strptime(date_raw, '%a, %b %d')
                date_month = date_nums.month
                date_day = date_nums.day

                if date_month > 7:
                    date_fixed = date(year-1, date_month, date_day)
                else:
                    date_fixed = date(year, date_month, date_day)

            except Exception as error:
                # print error
                # exception for leap year
                date_fixed = date(year, 2, 29)

            game_date_str = date_fixed.strftime('%m/%d/%Y')
            game_date = datetime.strptime(game_date_str, '%m/%d/%Y')

            if other_team == scores_df2.loc[index, 'away_team'] and\
                    game_date == scores_df2.loc[index, 'game_date']:
                try:
                    scores_df2.loc[index, 'game_id'] = cols[
                        2].a['href'].split('recap?id=')[1]
                    win_bool = True if cols[2].span.text == 'W' else False
                    scores_df2.loc[
                        index, 'win_flag'] = '1' if win_bool else '0'

                    wl_record = cols[3].text.split('-')
                    scores_df2.loc[index, 'wins_to_date'] = wl_record[0]
                    scores_df2.loc[index, 'losses_to_date'] = wl_record[1]

                    new_scores_count += 1

                    end = datetime.now()
                    time_diff = end - start
                    print "Finished getting score for {0} at {1} on {2} in {3} s.".format(home_team, other_team, game_date_str, time_diff)

                except Exception as error:
                    # error for games in the past that have been postponed
                    # print error
                    pass

    end_all = datetime.now()
    time_diff_all = end_all - start_all
    print "Finished getting {0} new scores in {1} s".format(new_scores_count, time_diff_all)

    return scores_df2

In [15]:
def save_scores_df2_to_csv(scores_f, scores_df2, year, timestamp):
    scores_f_add = 'csvs\\nba-season-scores-{0}-test-add.csv'.format(year)
    scores_f_temp = 'csvs\\nba-season-scores-{0}-test-temp.csv'.format(year)
    scores_f_removed = 'csvs\\nba-season-scores-{0}-test-backup.csv'.format(year)

    scores_f_add_text = open(scores_f_add, 'w')
    scores_df2.to_csv(scores_f_add, index=False)
    scores_f_add_text.close()
    print "Created new file with game scores to add '{0}' at {1}.".format(scores_f_add, timestamp)

    in_file_bool = False

    if not os.path.isfile(scores_f):
        scores_f_text = open(scores_f, 'w')
        scores_df2.to_csv(scores_f, index=False)
        scores_f_text.close()
        print "Created new file '{0}' at {1}.".format(scores_f, timestamp)
    else:
        # using the 'with' keyword ensures that the file is closed at the end
        # of the execution of the code
        with open(scores_f, 'rb') as f:
            reader = csv.reader(f, delimiter=',')
            for row in reader:
                # matchup date is in second column
                if row[15] == 'N/A' and row[1] < date.today():
                    print "Found game for which to get score."
                    in_file_bool = True
                    break

        answer = ''
        if not in_file_bool:
            scores_df2.to_csv(scores_f, mode='a', header=False, index=False)
            print "Cannot find game for which to get score in existing '{0}'.".format(scores_f)
        else:
            while answer != 'y' or answer != 'n':
                answer = raw_input(
                    "Do you want to get scores for games that have finished but whose scores have not been recorded? [y/n]: ")
                if answer == 'y':
                    with open(scores_f, 'rb') as f_in, open(scores_f_temp, 'wb') as f_out:
                        reader = csv.reader(f_in, delimiter=',')
                        writer = csv.writer(
                            f_out, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                        index = 0
                        for row in reader:
                            if row[15] == 'N/A' and row[1] < date.today:
                                writer.writerow(scores_df2.iloc[index])
                                index += 1
                            else:
                                writer.writerow(row)
                            if index >= len(scores_df2['game_date']):
                                break

                    if os.path.isfile(scores_f_removed):
                        os.remove(scores_f_removed)
                    os.rename(scores_f, scores_f_removed)
                    os.rename(scores_f_temp, scores_f)
                    scores_df2.to_csv(scores_f, mode='a', header=False, index=False)
                    print "Appended new data to existing '{0}' at {1}.".format(scores_f, timestamp)
                    break
                elif answer == 'n':
                    print "Chose not to get new data in existing '{0}' at {1}.".format(scores_f, timestamp)
                    break
                else:
                    print "Please enter either 'y' or 'n'."

    log_f = 'csvs\\nba-betting-lines-log-{0}.txt'.format(year)

    if not os.path.isfile(log_f):
        log_f_text = open(log_f, 'w')
        log_f_text.write("Created new file at {0}.\n".format(timestamp))
        log_f_text.close()
    else: 
        with open(log_f, 'a') as f:
            if not in_file_bool:
                f.write("Did not find data for which to get scores at {0}.\n".format(timestamp))
            else:
                if answer == 'y':
                    f.write("Appended new data at {0}.\n".format(timestamp))
                if answer == 'n':
                    f.write("Chose not to get new data at {0}.\n".format(timestamp))

            print "Logging timestamp to '{0}' at {1}.".format(log_f, timestamp)

In [16]:
def main1():
    year = 2016
    url_template = "http://espn.go.com/nba/team/schedule/_/name/{0}/year/{1}/{2}"
    teams_f = "csvs\\nba-teams.csv"
    teams_df = pd.read_csv(teams_f, index_col=False)
    all_teams = list(teams_df.team_abbrv.values.flatten())

    timestamp = get_timestamp()

    scores_f = 'csvs\\nba-season-scores-{0}-test.csv'.format(year)
    
    print "Updating existing '{0}' with new game scores.".format(scores_f)
    scores_df2 = make_scores_df2(year, url_template, teams_df, scores_f)
    
    save_scores_df2_to_csv(scores_f, scores_df2, year, timestamp)

In [17]:
if __name__ == '__main__':
    main1()

Updating existing 'csvs\nba-season-scores-2016-test.csv' with new game scores.
Finished getting score for ATL at DEN on 03/17/2016 in 0:00:01.216000 s.
Finished getting score for ATL at HOU on 03/19/2016 in 0:00:01.202000 s.
Finished getting score for TOR at BOS on 03/18/2016 in 0:00:04.711000 s.
Finished getting score for CHI at BKN on 03/17/2016 in 0:00:01.065000 s.
Finished getting score for DET at BKN on 03/19/2016 in 0:00:01.051000 s.
Finished getting score for MIA at CHA on 03/17/2016 in 0:00:01.157000 s.
Finished getting score for CHA at DEN on 03/19/2016 in 0:00:01.109000 s.
Finished getting score for CHI at BKN on 03/17/2016 in 0:00:01.033000 s.
Finished getting score for CHI at UTAH on 03/19/2016 in 0:00:01.003000 s.
Finished getting score for ORL at CLE on 03/18/2016 in 0:00:01.329000 s.
Finished getting score for MIA at CLE on 03/19/2016 in 0:00:02.821000 s.
Finished getting score for DAL at GS on 03/18/2016 in 0:00:01.123000 s.
Finished getting score for ATL at DEN on 03/1

IndexError: list index out of range