In [2]:
import pandas as pd
import numpy as np
import os 
import time
import re
from datetime import timedelta

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec

In [3]:
%load_ext autoreload
%autoreload 2

import parse_oddsPortal 

In [4]:
CHROMEDRIVER_PATH = 'C:/Users/rafae/Downloads/chromedriver_win32/'

service_driver = Service(CHROMEDRIVER_PATH + "chromedriver.exe")
driver = webdriver.Chrome(service=service_driver)

In [5]:
driver.get('https://www.oddsportal.com/soccer/brazil/serie-a-2016/vitoria-palmeiras-jPZuMtJ6/')

In [6]:
def get_game_info(driver):
    """
        Returns basic information on the game
    """
    game_header = driver.find_element(By.ID, 'col-content').text.split('\n')[:3]
    
    home_team, away_team = game_header[0].split(' - ')
    
    resp =  {
        'Game_URL': driver.current_url,
        'Home_Team': home_team,
        'Away_Team': away_team,
        'Datetime': pd.to_datetime(game_header[1]) - timedelta(hours=3),
        'Final_Result': game_header[2].split()[2],
        '1st_Half_Result': game_header[2].split()[3][1:-1],
        '2nd_Half_Result': game_header[2].split()[4][0:-1]
    }
    
    homeFull, awayFull = resp['Final_Result'].split(':')
    home1st, away1st = resp['1st_Half_Result'].split(':')
    home2nd, away2nd = resp['2nd_Half_Result'].split(':')
    
    resp.update({
        'Home_Score_FullTime': int(homeFull),
        'Home_Score_1st_Half': int(home1st),
        'Home_Score_2nd_Half': int(home2nd),
        'Away_Score_FullTime': int(awayFull),
        'Away_Score_1st_Half': int(away1st),
        'Away_Score_2nd_Half': int(away2nd),
    })    
    
    return resp 

In [7]:
get_game_info(driver)

{'Game_URL': 'https://www.oddsportal.com/soccer/brazil/serie-a-2016/vitoria-palmeiras-jPZuMtJ6/',
 'Home_Team': 'Vitoria',
 'Away_Team': 'Palmeiras',
 'Datetime': Timestamp('2016-12-11 17:00:00'),
 'Final_Result': '1:2',
 '1st_Half_Result': '1:2',
 '2nd_Half_Result': '0:0',
 'Home_Score_FullTime': 1,
 'Home_Score_1st_Half': 1,
 'Home_Score_2nd_Half': 0,
 'Away_Score_FullTime': 2,
 'Away_Score_1st_Half': 2,
 'Away_Score_2nd_Half': 0}

In [35]:
[x.get_attribute('href') for x in driver.find_element(By.ID, 'breadcrumb').find_elements(By.TAG_NAME, 'a')]

['https://www.oddsportal.com/',
 'https://www.oddsportal.com/soccer/',
 'https://www.oddsportal.com/soccer/brazil/',
 'https://www.oddsportal.com/soccer/brazil/serie-a-2016/results/']

In [44]:
def get_game_links_page(driver):
    links_in_table = driver.find_element(By.ID, 'tournamentTable').find_elements(By.TAG_NAME, 'a')
    
    r = []
    for x in links_in_table:
        if '-' in x.text:
            link = x.get_attribute('href')
            
            r.append({
                'Game': x.text,
                'Link': link,
                'Sport': link.split('/')[3],
                'Country': link.split('/')[4],
                'League': link.split('/')[5],
            })
    return r

In [50]:
page_i = 1

resp = []

while(True):
    print(f"Acessing page {page_i}...", end="\r")
    
    driver.get(f"https://www.oddsportal.com/soccer/brazil/serie-a-2016/results/#/page/{page_i}/")
    
    wait = WebDriverWait(driver, 20)
    wait.until(ec.visibility_of_element_located((By.ID, 'tournamentTable')))
    
    text = driver.find_element(By.ID, 'tournamentTable').text
    
    if 'Unfortunately, no matches can be displayed' in text:
        break
    
    resp.extend(
        get_game_links_page(driver)
    )
    
    page_i += 1

In [52]:
pd.DataFrame(resp).to_excel('Soccer__Brazil__Serie-a-2016.xlsx')

### Downloading links

In [None]:
england/premier-league-2020-2021

In [69]:
resp = parse_oddsPortal.get_game_links(driver, sport='soccer', country='england', league='premier-league-2020-2021')
pd.DataFrame(resp)

Acessing page 9...

Unnamed: 0,Game,Link,Sport,Country,League
0,Arsenal - Brighton,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
1,Aston Villa - Chelsea,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
2,Fulham - Newcastle,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
3,Leeds - West Brom,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
4,Leicester - Tottenham,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
...,...,...,...,...,...
375,West Brom - Leicester,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
376,West Ham - Newcastle,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
377,Liverpool - Leeds,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021
378,Crystal Palace - Southampton,https://www.oddsportal.com/soccer/england/prem...,soccer,england,premier-league-2020-2021


In [70]:
pd.DataFrame(resp).to_excel('tournament_links/Soccer__England__Premier-League-2020-2021.xlsx')

In [74]:
base_ref = pd.read_excel("tournament_links/Soccer__Brazil__Serie-a-2016.xlsx", index_col=0)

In [96]:
base_ref.Link[336]

'https://www.oddsportal.com/soccer/brazil/serie-a-2016/corinthians-santos-Wx0YEbJ7/'

In [None]:
# resp = []

i = 336
while i < len(base_ref.Link):
    print(f"{i}/{len(base_ref)}...", end='\r')
    
    link = base_ref.Link[i]
    
    driver.get(link)
    time.sleep(2)
    
    r = parse_oddsPortal.get_all_dfs(driver, verbose=False, pass_exception=True)
    resp.append(r)
    
    i += 1

356/380...

In [90]:
pd.DataFrame(resp)

Unnamed: 0,Game_URL,Home_Team,Away_Team,Datetime,Final_Result,1st_Half_Result,2nd_Half_Result,Home_Score_FullTime,Home_Score_1st_Half,Home_Score_2nd_Half,...,eh__(Unibet)-> 88.8%__1st Half,eh__(Unibet)-> 5.40__1st Half,eh__(Unibet)-> 89.9%__1st Half,eh__(Unibet)-> 19.00__1st Half,ah__(Marathonbet)-> 1.16__2nd Half,ah__(Marathonbet)-> 5.30__2nd Half,European handicap +1-> Payout__1st Half,European handicap +1-> Away__1st Half,European handicap +1-> Draw__1st Half,European handicap +1-> Home__1st Half
0,https://www.oddsportal.com/soccer/brazil/serie...,Athletico-PR,Flamengo RJ,2016-12-11 17:00:00,0:0,0:0,0:0,0,0,0,...,,,,,,,,,,
1,https://www.oddsportal.com/soccer/brazil/serie...,Cruzeiro,Corinthians,2016-12-11 17:00:00,3:2,1:1,2:1,3,1,2,...,,,,,,,,,,
2,https://www.oddsportal.com/soccer/brazil/serie...,Fluminense,Internacional,2016-12-11 17:00:00,1:1,0:0,1:1,1,0,1,...,,,,,,,,,,
3,https://www.oddsportal.com/soccer/brazil/serie...,Gremio,Botafogo RJ,2016-12-11 17:00:00,0:1,0:1,0:0,0,0,0,...,,,,,,,,,,
4,https://www.oddsportal.com/soccer/brazil/serie...,Ponte Preta,Coritiba,2016-12-11 17:00:00,2:0,0:0,2:0,2,0,2,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,https://www.oddsportal.com/soccer/brazil/serie...,Botafogo RJ,Internacional,2016-10-12 21:45:00,1:0,0:0,1:0,1,0,1,...,,,,,,,,,,
88,https://www.oddsportal.com/soccer/brazil/serie...,Santa Cruz,Corinthians,2016-10-12 21:45:00,2:4,1:1,1:3,2,1,1,...,5.40,88.8%,,,,,,,,
89,https://www.oddsportal.com/soccer/brazil/serie...,Coritiba,Figueirense,2016-10-12 19:30:00,0:0,0:0,0:0,0,0,0,...,,,7.00,,,,,,,
90,https://www.oddsportal.com/soccer/brazil/serie...,Chapecoense-SC,Sport Recife,2016-10-12 11:00:00,3:0,1:0,2:0,3,1,2,...,,,,,,,,,,


In [47]:
driver.get('https://www.oddsportal.com/soccer/brazil/serie-a-2016/results/#/page/10/')

In [49]:
driver.find_element(By.ID, 'tournamentTable').text

'Unfortunately, no matches can be displayed because there are no odds available from your selected bookmakers.\n|««5678»»|'

In [46]:
pd.DataFrame(get_game_links_page(driver))

Unnamed: 0,Game,Link,Sport,Country,League
0,Athletico-PR - Flamengo RJ,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
1,Chapecoense-SC - Atletico-MG,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
2,Cruzeiro - Corinthians,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
3,Fluminense - Internacional,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
4,Gremio - Botafogo RJ,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
5,Ponte Preta - Coritiba,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
6,Santos - America MG,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
7,Sao Paulo - Santa Cruz,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
8,Sport Recife - Figueirense,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016
9,Vitoria - Palmeiras,https://www.oddsportal.com/soccer/brazil/serie...,soccer,brazil,serie-a-2016


In [8]:
{x.text: x.get_attribute('onclick') for x in driver.find_element(By.CLASS_NAME, 'ul-nav').find_elements(By.TAG_NAME, 'li')}

{'': None,
 '1X2': None,
 'AH': None,
 'O/U': None,
 'DNB': None,
 'EH': None,
 'DC': None,
 'CS': None,
 'More bets': None}

In [18]:
# Iterate through types of bets:

tabs_bet_types = driver.find_element(By.CLASS_NAME, 'ul-nav').find_elements(By.TAG_NAME, 'li')
resp = {}

for tab in tabs_bet_types:
    try:
        tab.click()
    except Exception as e:
        if tab.text == '':
            continue
        else:
            raise e
    
    # Takes the type of the bet
    bet_type = parse_oddsPortal.get_bet_type(driver)
    
    print(f"Current page: {tab.text} <-> {bet_type}")

    li_halfs = parse_oddsPortal.get_elements_tempo(driver)
#     print(f"\t{li_halfs[0].text}")
#     time.sleep(2)

    for i, el in enumerate(li_halfs):
        el.click()
        time.sleep(2)
            
        current_game_period = el.text
        
        print(f"\t{current_game_period}")
        
        resp.update(
            parse_oddsPortal.get_info_as_jsons(
                parse_oddsPortal.get_df(driver),
                bet_type, 
                current_game_period
            )
        )

Current page: 1X2 <-> 1X2
	Full Time
	1st Half
	2nd Half
Current page: Asian Handicap <-> ah
	Full Time
	1st Half
	2nd Half
Current page: Over/Under <-> over-under
	Full Time
	1st Half
	2nd Half
Current page: Draw No Bet <-> dnb
	Full Time
	1st Half
	2nd Half
Current page: European Handicap <-> eh
	Full Time
	1st Half
	2nd Half
Current page: Double Chance <-> double
	Full Time
	1st Half
	2nd Half
Current page: Correct Score <-> cs
	Full Time
	1st Half
	2nd Half
Current page: Half Time / Full Time
Odd or Even
Both Teams to Score
More bets <-> cs
	Full Time
	1st Half
	2nd Half


In [20]:
resp

{'1X2__(1xBet)-> Home__Full Time': '1.92',
 '1X2__(1xBet)-> Draw__Full Time': '3.56',
 '1X2__(1xBet)-> Away__Full Time': '4.20',
 '1X2__(1xBet)-> Payout__Full Time': 0.9620000000000001,
 '1X2__(bet-at-home)-> Home__Full Time': '1.86',
 '1X2__(bet-at-home)-> Draw__Full Time': '3.42',
 '1X2__(bet-at-home)-> Away__Full Time': '3.86',
 '1X2__(bet-at-home)-> Payout__Full Time': 0.9179999999999999,
 '1X2__(bet365)-> Home__Full Time': '1.85',
 '1X2__(bet365)-> Draw__Full Time': '3.40',
 '1X2__(bet365)-> Away__Full Time': '4.33',
 '1X2__(bet365)-> Payout__Full Time': 0.938,
 '1X2__(bwin)-> Home__Full Time': '1.90',
 '1X2__(bwin)-> Draw__Full Time': '3.10',
 '1X2__(bwin)-> Away__Full Time': '4.00',
 '1X2__(bwin)-> Payout__Full Time': 0.91,
 '1X2__(Marathonbet)-> Home__Full Time': '1.90',
 '1X2__(Marathonbet)-> Draw__Full Time': '3.56',
 '1X2__(Marathonbet)-> Away__Full Time': '4.25',
 '1X2__(Marathonbet)-> Payout__Full Time': 0.9590000000000001,
 '1X2__(Pinnacle)-> Home__Full Time': '1.92',
 '1

In [None]:
wait = WebDriverWait(driver, 10)

In [17]:
resp = parse_oddsPortal.get_all_dfs(driver)
resp

Current page: 1X2 <-> 1X2
	Full Time
	1st Half
	2nd Half
Current page: Asian Handicap <-> ah
	Full Time
	1st Half
	2nd Half
Current page: Over/Under <-> over-under
	Full Time
	1st Half
	2nd Half
Current page: Draw No Bet <-> dnb
	Full Time
	1st Half
	2nd Half
Current page: European Handicap <-> eh
	Full Time
	1st Half
	2nd Half
Current page: Double Chance <-> double
	Full Time
	1st Half
	2nd Half
Current page: Correct Score <-> cs
	Full Time
	1st Half
	2nd Half
Current page: Half Time / Full Time
Odd or Even
Both Teams to Score
More bets <-> cs
	Full Time
	1st Half
	2nd Half


{'Game_URL': 'https://www.oddsportal.com/soccer/brazil/serie-a-2016/vitoria-palmeiras-jPZuMtJ6/#ah;2',
 'Home_Team': 'Vitoria',
 'Away_Team': 'Palmeiras',
 'Datetime': Timestamp('2016-12-11 17:00:00'),
 'Final_Result': '1:2',
 '1st_Half_Result': '1:2',
 '2nd_Half_Result': '0:0',
 'Home_Score_FullTime': 1,
 'Home_Score_1st_Half': 1,
 'Home_Score_2nd_Half': 0,
 'Away_Score_FullTime': 2,
 'Away_Score_1st_Half': 2,
 'Away_Score_2nd_Half': 0,
 '1X2__(1xBet)-> Home__Full Time': '1.92',
 '1X2__(1xBet)-> Draw__Full Time': '3.56',
 '1X2__(1xBet)-> Away__Full Time': '4.20',
 '1X2__(1xBet)-> Payout__Full Time': 0.9620000000000001,
 '1X2__(bet-at-home)-> Home__Full Time': '1.86',
 '1X2__(bet-at-home)-> Draw__Full Time': '3.42',
 '1X2__(bet-at-home)-> Away__Full Time': '3.86',
 '1X2__(bet-at-home)-> Payout__Full Time': 0.9179999999999999,
 '1X2__(bet365)-> Home__Full Time': '1.85',
 '1X2__(bet365)-> Draw__Full Time': '3.40',
 '1X2__(bet365)-> Away__Full Time': '4.33',
 '1X2__(bet365)-> Payout__Full

In [20]:
pd.DataFrame.from_dict([resp])

Unnamed: 0,Game_URL,Home_Team,Away_Team,Datetime,Final_Result,1st_Half_Result,2nd_Half_Result,Home_Score_FullTime,Home_Score_1st_Half,Home_Score_2nd_Half,...,Correct_Score__1:1-> Odds__2nd Half,Correct_Score__2:2-> Odds__2nd Half,Correct_Score__0:1-> Odds__2nd Half,Correct_Score__0:2-> Odds__2nd Half,Correct_Score__1:2-> Odds__2nd Half,Correct_Score__0:3-> Odds__2nd Half,Correct_Score__1:3-> Odds__2nd Half,Correct_Score__2:3-> Odds__2nd Half,Correct_Score__0:4-> Odds__2nd Half,Correct_Score__1:4-> Odds__2nd Half
0,https://www.oddsportal.com/soccer/brazil/serie...,Vitoria,Palmeiras,2016-12-11 17:00:00,1:2,1:2,0:0,1,1,0,...,7.0,35.0,6.75,18.0,19.0,61.0,67.0,126.0,251.0,276.0


In [436]:
# Iterates through other types of bets

extra_bets = driver.find_element(By.CLASS_NAME, 'othersList').find_elements(By.TAG_NAME, 'a')
extra_navigation = {
    x.get_attribute('innerHTML') : x.get_attribute('onclick') 
    for x in extra_bets
}

for key in extra_navigation:
    driver.execute_script(extra_navigation[key])
    
    # Takes the type of the bet
    bet_type = get_bet_type(driver)
    
    print(f"Current page: {key} <-> {bet_type}")

    li_halfs = get_elements_tempo(driver)
    for i, el in enumerate(li_halfs):
        el.click()
        time.sleep(2)
            
        current_game_period = el.text
        
        print(f"\t{current_game_period}")
        
        resp.update(
            get_info_as_jsons(
                get_df(driver),
                bet_type, 
                current_game_period
            )
        )

Current page: Half Time / Full Time <-> ht-ft
	Full Time
Current page: Odd or Even <-> odd-even
	Full Time
	1st Half
Current page: Both Teams to Score <-> bts
	Full Time
	1st Half
	2nd Half


In [437]:
resp

{'1X2__(1xBet)-> Home__Full Time': '1.92',
 '1X2__(1xBet)-> Draw__Full Time': '3.56',
 '1X2__(1xBet)-> Away__Full Time': '4.20',
 '1X2__(1xBet)-> Payout__Full Time': 0.9620000000000001,
 '1X2__(bet-at-home)-> Home__Full Time': '1.86',
 '1X2__(bet-at-home)-> Draw__Full Time': '3.42',
 '1X2__(bet-at-home)-> Away__Full Time': '3.86',
 '1X2__(bet-at-home)-> Payout__Full Time': 0.9179999999999999,
 '1X2__(bet365)-> Home__Full Time': '1.85',
 '1X2__(bet365)-> Draw__Full Time': '3.40',
 '1X2__(bet365)-> Away__Full Time': '4.33',
 '1X2__(bet365)-> Payout__Full Time': 0.938,
 '1X2__(bwin)-> Home__Full Time': '1.90',
 '1X2__(bwin)-> Draw__Full Time': '3.10',
 '1X2__(bwin)-> Away__Full Time': '4.00',
 '1X2__(bwin)-> Payout__Full Time': 0.91,
 '1X2__(Marathonbet)-> Home__Full Time': '1.90',
 '1X2__(Marathonbet)-> Draw__Full Time': '3.56',
 '1X2__(Marathonbet)-> Away__Full Time': '4.25',
 '1X2__(Marathonbet)-> Payout__Full Time': 0.9590000000000001,
 '1X2__(Pinnacle)-> Home__Full Time': '1.92',
 '1

In [89]:
# Muda entre tipo de aposta: 1x2, Home/Away, AH, O/U0, DNB, EH

navegar = {
    1: '1x2',
    2: 'Home/Away',
    3: 'AH',
    4: 'O/U',
    5: 'DNB',
    6: 'EH'
}
driver.find_element(By.CLASS_NAME, 'ul-nav').find_elements(By.TAG_NAME, 'li')[6].click()

In [90]:
def get_elements_tempo(driver):
    subactives = driver.find_elements(By.CLASS_NAME, 'subactive')
    
    for x in subactives:
        if '1st Half' in x.text or 'Full Time' in x.text:
            break
        
    return x.find_elements(By.TAG_NAME, 'li')

In [91]:
get_elements_tempo(driver)

[<selenium.webdriver.remote.webelement.WebElement (session="03369ca0f8ab34e272e2294a5194c4db", element="0ae71f5b-5232-4b7b-9b14-46f4eef2dfff")>,
 <selenium.webdriver.remote.webelement.WebElement (session="03369ca0f8ab34e272e2294a5194c4db", element="464631d0-2a9e-45c2-8e8d-3a361837a59f")>,
 <selenium.webdriver.remote.webelement.WebElement (session="03369ca0f8ab34e272e2294a5194c4db", element="fe1c0dd7-76e8-42d0-872c-ea8cc7cc425c")>]

In [203]:
get_elements_tempo(driver)[1].text

'2nd Half'

In [56]:
# Muda entre Full Time, 1st Half, 2nd Half
navegar_tempo = {
    0: 'Full Time',
    1: '1st Half',
    2: '2nd Half'
}

get_elements_tempo(driver)[0].click()

In [13]:
# Lista de outros tipos de apostas 

navegar_extra = {
    0: 'Double Chance',
    1: 'HalfTime/FullTime',
    2: 'Odd or Even',
    3: 'Both Teams to Score',
    
}
driver.find_element(By.CLASS_NAME, 'othersList').find_elements(By.TAG_NAME, 'a')[0].get_attribute('onClick')#[5].click()

'uid(0).hideMore();uid(8)._onClick();return false;'

In [435]:
def parse_handicaps(table_text, col_names):
    # Split each line of the table related
    # to the 'compare odds' string
    table_text = table_text.replace('(1)', '').replace('(0)', '').replace('(2)', '')
    clean_str = re.split('Click\sto\sshow', table_text)[0]
    clean_str = re.split('Average', clean_str)[0]
    clean_str = re.split('Hide\sodds', clean_str)[0]
    
    str_split = clean_str.split('Compare odds')
    
    df = pd.DataFrame([x.strip().split('\n') for x in str_split])
    # Remove last line and last column
    if len(df) > 1:
        df = df[:-1] 
    if len(df.columns) - 1 == len(col_names):
        df = df[df.columns[:-1]]
    
    df.columns = col_names
    df = df.dropna(axis=0) # Remove empty lines 
    
    # cast payout column to float
    if 'Payout' in df.columns:
        df['Payout'] = df['Payout'].str.replace('%', '').astype(float)/100

    return df  

In [429]:
parse_handicaps(
    driver.find_element(By.ID, 'odds-data-table').text, 
    ['Score', 'Odds']
)

Unnamed: 0,Score,Odds
0,1:0,4.40
1,2:0,8.50
2,2:1,13.00
3,3:0,21.00
4,3:1,33.00
5,3:2,91.00
6,4:0,61.00
7,4:1,101.00
8,4:2,276.00
9,5:0,226.00


In [414]:
driver.find_element(By.CLASS_NAME, 'table-chunk-header-dark').text.split()

[]

In [415]:
driver.find_element(By.CLASS_NAME, 'table-chunk-header-dark').text.split()

[]

In [249]:
def parse_odds(table_text):
    # Works for 1x2, Home/Away bet-types
    
    # Cleans string only until 'Click to show' or 'Average' message
    clean_str = re.split('Click\sto\sshow', table_text)[0]
    clean_str = re.split('Average', clean_str)[0].split('\n ')
    
    
    # Splits each line and convert it to a matrix 
    table_str = []
    for i, x in enumerate(clean_str):
        table_str.append(x.split('\n') if i > 0 else x.split())
    
    df = pd.DataFrame(table_str).replace('-', np.nan)
    df.columns = df.iloc[0] # first line as header
    df = df[1:][df.columns[:-1]] #Remove first line and last column
    
    # cast payout column to float
    if 'Payout' in df.columns:
        df['Payout'] = df['Payout'].str.replace('%', '').astype(float)/100
        
    return df

In [159]:
def get_bet_type(driver):
    # Takes the type of the bet
    try:
        bet_type = driver.current_url.split('#')[1].split(';')[0]
    except:
        # In case it is not in the URL, 1X2 is the default
        bet_type = '1X2'
        
    return bet_type

In [160]:
def get_df(driver):
    driver_text = driver.find_element(By.CLASS_NAME, 'table-main').text
    
    bet_type = get_bet_type(driver)
    
    # Indicates that the data is in text format, not HTML table
    # use the method 'parse_handicaps'
    if driver_text == '':
        # Special header for these types of bets:
        if bet_type == 'ht-ft':
            cols = ['Result', 'Odds']
        elif bet_type == 'cs':
            cols = ['Score', 'Odds']
        else:
            # default header from HTML
            cols = driver.find_element(By.CLASS_NAME, 'table-chunk-header-dark').text.split()
            
        resp =  parse_handicaps(
            driver.find_element(By.ID, 'odds-data-table').text, 
            cols
        )
    else:
        # Data comes in table, using the method 'parse_odds'
        resp = parse_odds(driver.find_element(By.ID, 'odds-data-table').text)
        
        # Post processing for the Odd/Even bet type
        if bet_type == 'odd-even':
            resp = resp.dropna(axis=1, thresh=1) # remove empty columns
            resp = resp.loc[:, ~resp.columns.duplicated()] # remove duplicated columns
            resp['Payout'] = resp['Goals'].str.replace('%', '').astype(float)/100
            
            # remove unnecessary columns
            del resp['Goals']
            del resp['Bookmakers']
            
            # renaming columns
            resp.columns = ['Bookmakers', 'Odd', 'Even', 'Payout']
            
    map_replaces = {
        '1': 'Home',
        '2': 'Away',
        'X': 'Draw'
    }

    resp.columns = [map_replaces[x] if x in map_replaces else x for x in resp.columns]

    return resp

In [350]:
print(driver.find_element(By.ID, 'odds-data-table').text)

European handicap -1
88.0%
1.42
3.45
7.00
(1)
Hide odds
Bookmakers
Hand.
1 X 2 Payout
 Unibet  
-1
7.00
3.45
1.42
88.0%
Click to show 1 more bookmakers!
Average 7.00 3.45 1.42 88.0%
Highest 7.00 3.45 1.42 88.0%
My Coupon
      User Predictions
Log in to show!
Options: OddsAlert
European handicap +1
88.4%
12.00
5.40
1.16
(1)
Hide odds
Bookmakers
Hand.
1 X 2 Payout
 Unibet  
+1
1.16
5.40
12.00
88.4%
Click to show 1 more bookmakers!
Average 1.16 5.40 12.00 88.4%
Highest 1.16 5.40 12.00 88.4%
My Coupon
      User Predictions
Log in to show!
Options: OddsAlert


In [348]:
print(driver_text)




In [346]:
get_df(driver)

ValueError: Length mismatch: Expected axis has 45 elements, new values have 5 elements

In [162]:
get_bet_type(driver)

'over-under'

In [279]:
def get_info_as_jsons(df, bet_type, game_period):
    """
        Parses DataFrame info into a single json
    """
    # Get dataframe as dictionary
    dic = df.set_index(df.columns[0]).to_dict('index')
    
    #Serializes and adds suffix
    r = {}
    for key in dic:       
        # When there are bookmakers options, add into prefix
        if 'Bookmakers' in df.columns: 
            bookmaker = key.strip()
            
            # For example:
                #  1x2__(bet365): Home__1stHalf
            r.update(
                {bet_type + "__(" + bookmaker + ")-> " + k + "__" + game_period : dic[key][k] for k in dic[key]}
            )
        elif bet_type == 'cs':
            bet_subtype = key.strip()
            # For example:
                #  Correct_Score__1:0: Home__1stHalf
            r.update(
                {'Correct_Score__' + bet_subtype + "-> " + k + "__" + game_period: dic[key][k] for k in dic[key]}
            )
        else:
            bet_subtype = key.strip()
            
            # For example:
                #  1x2__(bet365): Home__1stHalf
            r.update(
                {bet_subtype + "-> " + k + "__" + game_period: dic[key][k] for k in dic[key]}
            )
    
    return r

In [280]:
get_info_as_jsons(
    get_df(driver),
    get_bet_type(driver), 
    get_elements_tempo(driver)[0].text
)

{'1X2__(1xBet)-> Home__Full Time': '1.54',
 '1X2__(1xBet)-> Draw__Full Time': '4.50',
 '1X2__(1xBet)-> Away__Full Time': '5.70',
 '1X2__(1xBet)-> Payout__Full Time': 0.955,
 '1X2__(Asianodds)-> Home__Full Time': '1.51',
 '1X2__(Asianodds)-> Draw__Full Time': '4.66',
 '1X2__(Asianodds)-> Away__Full Time': '6.64',
 '1X2__(Asianodds)-> Payout__Full Time': 0.973,
 '1X2__(bet-at-home)-> Home__Full Time': '1.47',
 '1X2__(bet-at-home)-> Draw__Full Time': '4.30',
 '1X2__(bet-at-home)-> Away__Full Time': '5.80',
 '1X2__(bet-at-home)-> Payout__Full Time': 0.9209999999999999,
 '1X2__(bet365)-> Home__Full Time': '1.50',
 '1X2__(bet365)-> Draw__Full Time': '4.00',
 '1X2__(bet365)-> Away__Full Time': '7.00',
 '1X2__(bet365)-> Payout__Full Time': 0.9440000000000001,
 '1X2__(bwin)-> Home__Full Time': '1.50',
 '1X2__(bwin)-> Draw__Full Time': '4.40',
 '1X2__(bwin)-> Away__Full Time': '6.00',
 '1X2__(bwin)-> Payout__Full Time': 0.943,
 '1X2__(Coolbet)-> Home__Full Time': '1.54',
 '1X2__(Coolbet)-> Draw_