In [1]:
import requests
import selenium
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
HLTV_URL = 'https://www.hltv.org/betting/money'
soup = BeautifulSoup(requests.get(HLTV_URL).content, 'html.parser')

In [13]:
from selenium import webdriver
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver') #executable_path=yourdriver
driver.get(HLTV_URL)
# html = driver.page_source

In [55]:
table = driver.find_elements_by_class_name('horizontalScroll')
# table = table[0].text

In [57]:
table[0].text

'Aorus League 2019 #3 Southern Cone\nBO3\n9z\n1.52 1.53 1.52 1.52 1.53\nAgressive\n2.46 2.44 2.46 2.46 2.44\nLOOT.BET Season 4 Closed Qualifier\nBO3\nIzako Boars\n1.90 1.95 1.92 1.83 1.96 2.00 1.92 2.15 1.83 1.95 1.96 1.92\nLDLC\n1.80 1.81 1.82 1.83 1.81 1.75 1.82 1.65 1.83 1.81 1.81 1.82\nBO3\nCopenhagen Flames\n1.90 2.04 1.92 1.83 2.02 1.87 2.05 2.00 1.83 2.04 2.02 2.05\nSJ\n1.80 1.74 1.82 1.83 1.76 1.86 1.72 1.75 1.83 1.74 1.76 1.72\nBrasil Game Show 2019\nBO3\npaiN\n1.79 1.79\nKeyd\n1.98 1.98\nBO3\nDETONA\n1.25 1.26 1.25 1.25 1.26 1.24 1.24\nFalkol\n3.77 3.72 3.77 3.77 3.72 3.65 3.70\nBO3\nW7M\n1.43 1.43 1.44 1.43 1.43 1.42 1.42\nBulldozer\n2.74 2.76 2.61 2.74 2.76 2.70 2.70'

In [280]:
def convert_to_number(s):
    """Converts string s into a number. Returns -1 on failure."""
    
    try:
        num = float(s)
    except ValueError:
        num = -1
    return num

def get_bookie_name(html):
    
    name = 'Not Found'
    for html_attr in html.contents:
        try:
            class_string = ' '.join(html_attr['class'])
            if 'betting-list-odds-provider' in class_string:
                name = class_string.split('-')[-1]
                break
        except TypeError:
            continue
    
    return name

def get_book_makers(driver):
    bookmakers = driver.find_elements_by_class_name('provider-cell')
    bookmakers = [bookie.get_attribute('class') for bookie in bookmakers]
    bookmakers = [bookie[27:] for bookie in bookmakers if not 'hidden' in bookie]
    return bookmakers

def get_team_names(raw_html):
    html = BeautifulSoup(raw_html).find_all("div", class_="team-name")
    team_names = [team.text for team in html]
    return team_names

def get_bet_types(raw_html):
    html = BeautifulSoup(raw_html).find_all("div", class_="bet-best-of")
    bet_types = [bet.text for bet in html]    
    return bet_types

def get_tournament_name(raw_html):
    html = BeautifulSoup(raw_html)
    tournament_name = html.contents[0].contents[0].text
    return tournament_name

def get_odds_rows(raw_html):
    separator = '/td>'
    rows = [tok + separator for tok in raw_html.split(separator) if 'odds betting-list-odds' in tok]
    html_rows = [BeautifulSoup(r, 'html.parser') for r in rows]
    return html_rows

def is_valid_bookie(row):
    name = get_bookie_name(row)
    if 'hidden' in name:
        is_valid = False
    else:
        is_valid = True
    return is_valid

def decode_row(row):
    bookie_name = get_bookie_name(row)
    odds = convert_to_number(row.text)
    return bookie_name, odds

In [281]:
bookmakers = get_book_makers(driver)
num_bookmakers = len(bookmakers)
html = driver.page_source
tournaments = ['<div class="event-header' + s for s in html.split('<div class="event-header')][1:]


table_data = []
for tournament in tournaments:
    
    tournament_name = get_tournament_name(tournament)
    team_names = get_team_names(tournament)
    bet_types = get_bet_types(tournament)
    html_rows = get_odds_rows(tournament)
    num_matches = int(len(team_names) / 2)
    
    row_idx = 0
    rows_by_team = []
    for row in html_rows:
        
        if not is_valid_bookie(row):
            continue
        
        contestant_idx = int(row_idx / num_bookmakers)
        bet_type_idx = int(row_idx / (2 * num_bookmakers))
        row_idx += 1
        bookie_name, odds = decode_row(row)
        
        row_data = {
            'tournament_name': tournament_name, 
            'team_name': team_names[contestant_idx], 
            'bookie_name': bookie_name, 
            'odds': odds, 
            'bet_type': bet_types[bet_type_idx]
        }
        rows_by_team.append(row_data)
#     print(rows_by_team)
    
    for match_idx, match in enumerate(rows_by_team):
        
        if (int(match_idx / num_bookmakers) % 2) != 0:
            continue
        
        row = (
            match['tournament_name'],
            match['team_name'],
            rows_by_team[match_idx + num_bookmakers]['team_name'],
            match['odds'],
            rows_by_team[match_idx + num_bookmakers]['odds'],
            match['bet_type'],
            match['bookie_name']
        )
        table_data.append(row)
        
#     break

In [282]:
table_data

[('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'betway'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  1.52,
  2.46,
  'BO3',
  'ggbet'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'thunderpick'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'lootbet'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'bet365'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  1.53,
  2.44,
  'BO3',
  '1xbet'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'pinnacle'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'buff88'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'unibet'),
 ('Aorus League 2019 #3 Southern Cone',
  '9z',
  'Agressive',
  -1,
  -1,
  'BO3',
  'parimatch'),
 ('Aorus Leag

In [249]:
BeautifulSoup(tournament).find_all("div", class_="bet-best-of")[0].text

'BO3'

In [215]:
s.contents[1]

<td class="odds betting-list-odds betting-list-odds-provider-glhf"></td>

In [54]:
bookmakers = get_bookmakers(driver)
print(len(bookmakers))
print(bookmakers)

16
['betway ', 'ggbet ', 'thunderpick ', 'lootbet ', 'bet365 ', '1xbet ', 'pinnacle ', 'buff88 ', 'unibet ', 'parimatch ', 'unikrn ', 'vulkanbet ', 'betwinner ', 'xbet ', 'cyberbet ', 'glhf ']


In [51]:
for element in table:
    print(element.get_attribute('class')[27:])
#     print(element.text)
#     print(element.tag_name)
#     print(element.parent)
#     print(element.location)
#     print(element.size)

betway 
ggbet 
ggbetru hidden
thunderpick 
lootbet 
bet365 
1xbet 
1xstavka hidden
pinnacle 
buff88 
unibet 
thunderfire hidden
parimatch 
skrilla hidden
unikrn 
vulkanbet 
raybet hidden
xingwang hidden
betwinner 
yabo hidden
xbet 
shark hidden
cyberbet 
glhf 


In [48]:
element.get_attribute('class')

'provider-cell betting-list-glhf '

In [59]:
html

'<html lang="en"><head><style></style>\n    <meta charset="utf-8">\n    <meta name="viewport" content="width=device-width, initial-scale=1" id="metaViewport">\n    <meta property="fb:admins" content="1004164229">\n    <meta property="fb:pages" content="249997999009">\n    <meta property="fb:app_id" content="1460388157605817">\n    <meta name="google-site-verification" content="DcypRFLQvgYQL5Acx7feoGWbblSsmKv6HpPI7mM_1uw">\n    <link rel="apple-touch-icon" sizes="180x180" href="/img/static/favicon/apple-touch-icon.png">\n    <link rel="icon" type="image/png" sizes="32x32" href="/img/static/favicon/favicon-32x32.png">\n    <link rel="icon" type="image/png" sizes="16x16" href="/img/static/favicon/favicon-16x16.png">\n    <link rel="manifest" href="/img/static/favicon/manifest.json">\n    <link rel="mask-icon" href="/img/static/favicon/safari-pinned-tab.svg" color="#5bbad5">\n    <meta name="theme-color" content="#ffffff">\n    <link href="https://fonts.googleapis.com/css?family=Open+Sans: