In [13]:
from bs4 import BeautifulSoup
import glob
import csv
import os
import datetime

In [19]:
path = r"/Users/Milton/Developer/Hockey/data/Odds"

baseBet = 100
header = ['Date', 'HomeTeam', 'AwayTeam', 'HomeOdds', 'AwayOdds']
rows = []

def calculateOdds(moneyLine):
    return moneyLine / baseBet if moneyLine > 0 else baseBet / abs(moneyLine) 

def standardizeName(name):
    if name == 'N.Y. Islanders':
        return 'New York Islanders'
    elif name == 'N.Y. Rangers':
        return 'New York Rangers'
    elif name == 'Arizona':
        return 'Coyotes'
    elif name == 'Montreal':
        return 'Montréal'
    else:
        return name

def parse(soup, date):
    rowData = []
    table = soup.find('div', attrs={'class': ['eventGroup', 'class-nhl-hockey']})
    if table:
        for row in table.find_all('div', attrs={'class': ['event-holder', 'holder-complete']}):
            teams = row.find_all('span', 'team-name')
            awayTeam = teams[0].find('a').get_text() 
            homeTeam = teams[1].find('a').get_text()
            odds = row.find_all('div', 'eventLine-book')
            pinnacle = odds[1].find_all('div', 'eventLine-book-value')
            if pinnacle[0].get_text() and pinnacle[1].get_text():
                awayMoneyLine = int(pinnacle[0].get_text())
                homeMoneyLine = int(pinnacle[1].get_text())
            else: 
                continue
            awayOdds = calculateOdds(awayMoneyLine)
            homeOdds = calculateOdds(homeMoneyLine)
#             print("Away Money Line %d Home Money Line %d" % (awayMoneyLine, homeMoneyLine))
#             print("Away Odds %6.4f HomeOdds %6.4f" % (awayOdds, homeOdds))
            rowData.append([date, standardizeName(homeTeam), standardizeName(awayTeam), homeOdds, awayOdds])
    return rowData
    
def convertDate(date):
    dateObj = datetime.datetime.strptime(date, '%Y%m%d')
    return dateObj.strftime('%Y/%m/%d')
    
for file in glob.glob(path + "/*.html"):
    with open(file, 'rb') as infile: 
        date = os.path.basename(infile.name).split('.')[0]
        soup = BeautifulSoup(infile)
        print(convertDate(date))
        rows.extend(parse(soup, convertDate(date)))
        
with open('Odds 2007-2018 Raw.csv', 'w') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerow(header)
    writer.writerows(rows)

2012/01/01
2015/02/03
2014/02/03
2010/01/08
2015/11/07
2011/01/08
2013/12/05
2014/11/07
2017/01/23
2010/02/21
2011/02/21
2016/01/23
2010/11/25
2017/12/27
2012/02/28
2016/12/27
2011/11/25
2015/10/15
2017/03/18
2014/10/15
2016/03/18
2015/03/11
2014/03/11
2018/03/20
2009/12/03
2008/12/03
2009/01/07
2008/01/07
2007/11/12
2014/03/07
2015/03/07
2008/12/15
2011/10/21
2009/04/04
2008/04/04
2009/12/15
2010/10/21
2007/11/04
2013/10/28
2008/01/11
2011/03/25
2009/01/11
2010/03/25
2016/11/18
2014/02/15
2017/11/18
2015/02/15
2012/01/17
2014/11/11
2012/04/02
2013/12/13
2015/11/11
2007/10/16
2016/12/31
2018/02/24
2017/12/31
2009/11/10
2017/10/26
2016/10/26
2008/11/10
2017/03/22
2009/02/14
2007/12/01
2008/02/14
2016/03/22
2010/03/09
2011/03/09
2013/10/04
2007/11/28
2009/03/06
2008/03/06
2018/01/21
2009/10/02
2008/10/02
2018/02/08
2012/02/12
2015/01/10
2014/01/10
2015/12/14
2014/04/05
2017/01/19
2013/11/16
2015/04/05
2014/12/14
2016/01/19
2013/12/29
2016/02/26
2011/01/24
2008/03/10
2010/01/24
2009/03/10

2015/01/17
2012/02/15
2014/12/13
2013/11/11
2015/04/02
2014/04/02
2015/12/13
2008/03/01
2009/03/01
2011/12/31
2008/10/05
2018/01/26
2010/12/31
2009/10/05
2013/10/03
2012/03/07
2008/11/17
2016/10/21
2017/10/21
2009/11/17
2014/10/28
2007/12/06
2016/03/25
2008/02/13
2015/10/28
2009/02/13
2017/03/25
2007/10/11
2018/02/23
2009/10/29
2012/01/10
2015/02/12
2008/10/29
2014/02/12
2015/11/16
2010/01/19
2013/12/14
2014/11/16
2012/04/05
2011/01/19
2010/10/26
2009/12/12
2008/04/03
2018/03/31
2009/04/03
2011/10/26
2008/12/12
2010/03/22
2009/01/16
2007/11/03
2011/03/22
2008/01/16
2017/03/09
2016/03/09
2008/12/04
2011/10/30
2018/03/27
2009/12/04
2010/10/30
2007/11/15
2014/10/12
2008/02/29
2015/10/12
2014/03/16
2015/03/16
2007/10/07
2014/12/29
2016/01/24
2011/02/26
2015/12/29
2010/02/26
2017/01/24
2011/11/22
2016/12/20
2017/12/20
2010/11/22
2014/02/04
2016/11/09
2015/02/04
2007/09/29
2012/01/06
2017/11/09
2013/12/02
2008/02/09
2009/02/09
2008/01/20
2011/03/14
2013/10/19
2009/01/20
2010/03/14
2008/12/24

2010/10/23
2009/12/17
2011/10/23
2008/12/17
2009/04/06
2015/03/05
2017/10/08
2014/03/05
2008/01/05
2011/03/31
2007/11/10
2009/01/05
2010/03/31
2009/04/10
2008/12/01
2018/03/22
2009/12/01
2008/11/28
2014/03/13
2009/11/28
2015/03/13
2014/10/17
2015/10/17
2016/12/25
2011/11/27
2014/01/28
2010/11/27
2017/12/25
2015/01/28
2007/10/02
2011/02/23
2016/01/21
2017/01/21
2010/02/23
2016/02/08
2013/12/07
2014/11/05
2017/02/08
2015/11/05
2014/02/01
2018/01/19
2012/01/03
2015/02/01
2007/12/19
2008/11/08
2009/11/08
2008/12/21
2011/10/15
2012/03/18
2018/03/02
2009/12/21
2010/10/15
2008/01/25
2011/03/11
2007/11/30
2009/01/25
2010/03/11
2014/02/21
2012/01/23
2015/02/21
2016/02/28
2013/12/27
2014/11/25
2017/02/28
2015/11/25
2007/10/22
2011/02/03
2016/01/01
2017/01/01
2010/02/03
2016/12/05
2011/11/07
2014/01/08
2010/11/07
2017/12/05
2018/02/10
2015/01/08
2009/03/08
2013/12/31
2008/03/08
2010/02/15
2017/01/17
2016/01/17
2011/02/15
2013/11/18
2018/02/06
2017/12/13
2016/04/02
2010/11/11
2011/11/11
2017/04/02

In [20]:
import pandas as pd
frame = pd.read_csv(r'/Users/Milton/Developer/Hockey/Article Testing Sandbox/Odds 2007-2018 Raw.csv')
frame

Unnamed: 0,Date,HomeTeam,AwayTeam,HomeOdds,AwayOdds
0,2012/01/01,Nashville,Calgary,0.819672,1.130000
1,2015/02/03,New Jersey,Ottawa,0.793651,1.140000
2,2015/02/03,Columbus,Coyotes,0.598802,1.510000
3,2015/02/03,Washington,Los Angeles,0.869565,1.040000
4,2015/02/03,New York Islanders,Florida,0.396825,2.240000
5,2015/02/03,Montréal,Buffalo,0.243902,3.540000
6,2015/02/03,St. Louis,Tampa Bay,0.675676,1.340000
7,2015/02/03,Nashville,Toronto,0.558659,1.610000
8,2015/02/03,Minnesota,Chicago,1.220000,0.740741
9,2015/02/03,Dallas,Colorado,0.625000,1.440000
