# Scraping Interactive Match Links From Main Page

In [183]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import re
from itertools import islice
from selenium.webdriver.chrome.service import Service
import warnings

warnings.filterwarnings("ignore")

# Initialize the driver
driver_path = 'C:/Users/99451/AppData/Local/Programs/Python/Python312/Lib/site-packages/selenium/webdriver/chromedriver-win64/chromedriver.exe'
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

# Open the OddsPortal page
driver.get('https://www.oddsportal.com/football/netherlands/eredivisie/results/')

# Function to scroll the page and load more matches
def scroll_to_load_matches():
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Wait for the new content to load

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break  # Exit the loop if no more matches are loaded
        last_height = new_height

# Scroll to load all matches
scroll_to_load_matches()

# Find all links on the page
links = driver.find_elements(By.TAG_NAME, 'a')

# Extract URLs from the link elements
urls = [link.get_attribute('href') for link in links if link.get_attribute('href')]

# Close the driver
driver.quit()

# Filter out None or empty links
urls = [url for url in urls if url]

# Dictionary to store longest URLs for each match
longest_urls = {}

# Regex pattern to identify match URLs (this may need to be adjusted based on the actual URL structure)
match_pattern = re.compile(r'/football/.+/.+/[^/]+-[^/]+-[\w-]+/')

for url in urls:
    # Check if the URL matches the match pattern
    if match_pattern.search(url):
        # Extract match identifier
        parts = url.split('/')
        match_identifier = parts[-2]  # This may represent the match

        # Store the longest URL for each match
        if match_identifier not in longest_urls or len(url) > len(longest_urls[match_identifier]):
            longest_urls[match_identifier] = url

# Reassign original_dict to only its first N elements
#longest_urls = dict(islice(longest_urls.items(), 11))

# Convert the longest_urls dictionary to a list of tuples
longest_url_items = list(longest_urls.items())

# Print the first 5 matches and links
print("First 5 Matches:")
for match, long_url in longest_url_items[:5]:
    print(f"Match Identifier: {match}, Longest URL: {long_url}")

# Print a separator
print("\n...\n")  # Optional separator between sections

# Print the last 5 matches and links
print("Last 5 Matches:")
for match, long_url in longest_url_items[-5:]:
    print(f"Match Identifier: {match}, Longest URL: {long_url}")

First 5 Matches:
Match Identifier: az-alkmaar-g-a-eagles-Uoq4OXmC, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/az-alkmaar-g-a-eagles-Uoq4OXmC/
Match Identifier: ajax-willem-ii-40ZEtF3t, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/ajax-willem-ii-40ZEtF3t/
Match Identifier: twente-heracles-2asCMBIO, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/twente-heracles-2asCMBIO/
Match Identifier: utrecht-feyenoord-YJkeQgJa, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/utrecht-feyenoord-YJkeQgJa/
Match Identifier: heerenveen-sparta-rotterdam-QgnmSFlm, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/heerenveen-sparta-rotterdam-QgnmSFlm/

...

Last 5 Matches:
Match Identifier: ajax-sittard-IkYj6aFa, Longest URL: https://www.oddsportal.com/football/netherlands/eredivisie/ajax-sittard-IkYj6aFa/
Match Identifier: twente-heerenveen-pOKq8Lpm, Longest URL: https://www.oddspo

# Getting Inside of Match Links and Scraping Full Time Odds

In [184]:
from bs4 import BeautifulSoup
import pandas as pd

# links = ['Full Time', 'Double Chance', 'Half Time', 'HT Double Chance', 'BTTS']
extensions = ['#1X2;2', '#double;2', '#1X2;3', '#double;3' ,'#bts;2']
full_time_links = [link + extensions[0] for link in longest_urls.values()]
double_chance_links = [link + extensions[1] for link in longest_urls.values()]
half_time_links = [link + extensions[2] for link in longest_urls.values()]
ht_double_chance_links = [link + extensions[3] for link in longest_urls.values()]
btts_links = [link + extensions[4] for link in longest_urls.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
home, away, ft1, ftx, ft2 = [], [], [], [], []

for link in full_time_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(10)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    elements = soup.select('.text-\[\#2F2F2F\] .height-content , .max-sm\:gap-1')
    teams = soup.select('.leading-4 p')

    # Extract the teams content from each span element
    match_text = teams[0].get_text()
    home_team, away_team = [team.strip() for team in match_text.split(' - ')]

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
        driver.get(link)

        # Give time for dynamic content to load
        driver.implicitly_wait(10)  # or time.sleep(seconds)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        elements = soup.select('.text-\[\#2F2F2F\] .height-content , .max-sm\:gap-1')
        teams = soup.select('.leading-4 p')

        # Extract the teams content from each span element
        match_text = teams[0].get_text()
        home_team, away_team = [team.strip() for team in match_text.split(' - ')]

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

    home.append(home_team)
    away.append(away_team)
    ft1.append(float(elements[bet365_index + 1].text))
    ftx.append(float(elements[bet365_index + 2].text))
    ft2.append(float(elements[bet365_index + 3].text))

driver.quit()

# Create a DataFrame from the lists
ft_df = pd.DataFrame({
    'Home': home,
    'Away': away,
    'FT1': ft1,
    'FTX': ftx,
    'FT2': ft2
})

ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0
1,Ajax,Willem II,1.38,5.0,7.5
2,Twente,Heracles,1.22,7.0,11.0
3,Utrecht,Feyenoord,4.0,3.8,1.85
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9


# Taking Full Time Double Chance Odds

In [185]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
dc1x, dc12, dcx2 = [], [], []

for link in double_chance_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(10)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
        driver.get(link)

        # Give time for dynamic content to load
        driver.implicitly_wait(10)  # or time.sleep(seconds)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    
    dc1x.append(float(elements[bet365_index + 1].text))
    dc12.append(float(elements[bet365_index + 2].text))
    dcx2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['DC1X'], ft_df['DC12'], ft_df['DCX2'] = dc1x, dc12, dcx2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0,1.17,1.25,2.1
1,Ajax,Willem II,1.38,5.0,7.5,1.1,1.17,3.0
2,Twente,Heracles,1.22,7.0,11.0,1.05,1.1,4.0
3,Utrecht,Feyenoord,4.0,3.8,1.85,1.91,1.25,1.22
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9,1.4,1.29,1.57


# Taking Half Time Odds

In [186]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht1, htx, ht2 = [], [], []

for link in half_time_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(10)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
        driver.get(link)

        # Give time for dynamic content to load
        driver.implicitly_wait(10)  # or time.sleep(seconds)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

    ht1.append(float(elements[bet365_index + 1].text))
    htx.append(float(elements[bet365_index + 2].text))
    ht2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['HT1'], ft_df['HTX'], ft_df['HT2'] = ht1, htx, ht2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0,1.17,1.25,2.1,2.3,2.3,4.75
1,Ajax,Willem II,1.38,5.0,7.5,1.1,1.17,3.0,1.83,2.5,7.0
2,Twente,Heracles,1.22,7.0,11.0,1.05,1.1,4.0,1.62,2.88,8.5
3,Utrecht,Feyenoord,4.0,3.8,1.85,1.91,1.25,1.22,4.33,2.38,2.4
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9,1.4,1.29,1.57,2.88,2.38,3.25


# Taking Half Time Double Chance Odds

In [187]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht1x, ht12, htx2 = [], [], []

for link in ht_double_chance_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(10)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
        driver.get(link)

        # Give time for dynamic content to load
        driver.implicitly_wait(10)  # or time.sleep(seconds)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

    
    ht1x.append(float(elements[bet365_index + 1].text))
    ht12.append(float(elements[bet365_index + 2].text))
    htx2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['HT1X'], ft_df['HT12'], ft_df['HTX2'] = ht1x, ht12, htx2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0,1.17,1.25,2.1,2.3,2.3,4.75,1.17,1.57,1.57
1,Ajax,Willem II,1.38,5.0,7.5,1.1,1.17,3.0,1.83,2.5,7.0,1.08,1.5,1.83
2,Twente,Heracles,1.22,7.0,11.0,1.05,1.1,4.0,1.62,2.88,8.5,1.05,1.4,2.2
3,Utrecht,Feyenoord,4.0,3.8,1.85,1.91,1.25,1.22,4.33,2.38,2.4,1.53,1.53,1.2
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9,1.4,1.29,1.57,2.88,2.38,3.25,1.33,1.53,1.4


# Taking BTTS Odds

In [189]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
btts, otts = [], []

for link in btts_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(10)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
       driver.get(link)

        # Give time for dynamic content to load
       driver.implicitly_wait(10)  # or time.sleep(seconds)

       html = driver.page_source
       soup = BeautifulSoup(html, 'html.parser')

       elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

       # Find the index of the element containing the text "bet365"
       bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    
    btts.append(float(elements[bet365_index + 1].text))
    otts.append(float(elements[bet365_index + 2].text))

driver.quit()

ft_df['BTTS'], ft_df['OTTS'] = btts, otts
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0,1.17,1.25,2.1,2.3,2.3,4.75,1.17,1.57,1.57,1.75,2.0
1,Ajax,Willem II,1.38,5.0,7.5,1.1,1.17,3.0,1.83,2.5,7.0,1.08,1.5,1.83,2.0,1.75
2,Twente,Heracles,1.22,7.0,11.0,1.05,1.1,4.0,1.62,2.88,8.5,1.05,1.4,2.2,1.91,1.91
3,Utrecht,Feyenoord,4.0,3.8,1.85,1.91,1.25,1.22,4.33,2.38,2.4,1.53,1.53,1.2,1.62,2.2
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9,1.4,1.29,1.57,2.88,2.38,3.25,1.33,1.53,1.4,1.5,2.5


# Taking Over/Under Odds Full Time

In [190]:
# links = ['Over/Under 1.5', 'Over/Under 2.5', 'Over/Under 3.5', 'Over/Under 4.5']
over_under = ['#over-under;2;1.50;0', '#over-under;2;2.50;0', '#over-under;2;3.50;0', '#over-under;2;4.50;0']
over15_links = [link + over_under[0] for link in longest_urls.values()]
over25_links = [link + over_under[1] for link in longest_urls.values()]
over35_links = [link + over_under[2] for link in longest_urls.values()]
over45_links = [link + over_under[3] for link in longest_urls.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
home, away, over15, under15 = [], [], [], []

for link in over15_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    teams = soup.select('.leading-4 p')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Extract the teams content from each span element
    match_text = teams[0].get_text()
    home_team, away_team = [team.strip() for team in match_text.split(' - ')]

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

    home.append(home_team)
    away.append(away_team)
    over15.append(float(elements[bet365_index + 1].text))
    under15.append(float(elements[bet365_index + 2].text))

driver.quit()

# Create a DataFrame from the lists
over_df = pd.DataFrame({
    'Home': home,
    'Away': away,
    '1.5O': over15,
    '1.5U': under15,
})

over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0
1,Ajax,Willem II,1.22,4.0
2,Twente,Heracles,1.13,6.0
3,Utrecht,Feyenoord,1.2,4.33
4,Heerenveen,Sparta Rotterdam,1.17,5.0


# Taking Over/Under 2.5 Odds

In [191]:
driver = webdriver.Chrome(service=service)

over25, under25 = [], []

for link in over25_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    if bet365_index == None:
        # Refresh the page
        driver.refresh()
        driver.implicitly_wait(3)
        scroll_to_load_matches()
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')
        
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        over25.append(float(elements[bet365_index + 1].text))
        under25.append(float(elements[bet365_index + 2].text))
    else:
        over25.append(float(elements[bet365_index + 1].text))
        under25.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['2.5O'], over_df['2.5U'] = over25, under25
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0,1.75,2.05
1,Ajax,Willem II,1.22,4.0,1.7,2.1
2,Twente,Heracles,1.13,6.0,1.44,2.7
3,Utrecht,Feyenoord,1.2,4.33,1.67,2.15
4,Heerenveen,Sparta Rotterdam,1.17,5.0,1.57,2.35


# Taking Over/Under 3.5 Odds

In [192]:
driver = webdriver.Chrome(service=service)

over35, under35 = [], []

for link in over35_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    over35.append(float(elements[bet365_index + 1].text))
    under35.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['3.5O'], over_df['3.5U'] = over35, under35
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0,1.75,2.05,2.75,1.4
1,Ajax,Willem II,1.22,4.0,1.7,2.1,2.63,1.44
2,Twente,Heracles,1.13,6.0,1.44,2.7,2.0,1.73
3,Utrecht,Feyenoord,1.2,4.33,1.67,2.15,2.63,1.44
4,Heerenveen,Sparta Rotterdam,1.17,5.0,1.57,2.35,2.38,1.53


# Taking Over/Under 4.5 Odds

In [193]:
driver = webdriver.Chrome(service=service)

over45, under45 = [], []

for link in over45_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    over45.append(float(elements[bet365_index + 1].text))
    under45.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['4.5O'], over_df['4.5U'] = over45, under45
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14
1,Ajax,Willem II,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17
2,Twente,Heracles,1.13,6.0,1.44,2.7,2.0,1.73,3.5,1.29
3,Utrecht,Feyenoord,1.2,4.33,1.67,2.15,2.63,1.44,5.0,1.17
4,Heerenveen,Sparta Rotterdam,1.17,5.0,1.57,2.35,2.38,1.53,4.0,1.22


# Taking Half Time Over/Under Odds

In [194]:
# links = ['Over/Under 0.5', 'Over/Under 1.5']
ht_over_under = ['#over-under;3;0.50;0', '#over-under;3;1.50;0']
ht_over05_links = [link + ht_over_under[0] for link in longest_urls.values()]
ht_over15_links = [link + ht_over_under[1] for link in longest_urls.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht_over05, ht_under05 =  [], []

for link in ht_over05_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    ht_over05.append(float(elements[bet365_index + 1].text))
    ht_under05.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['HT0.5O'], over_df['HT0.5U'] = ht_over05, ht_under05
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.33,3.25
1,Ajax,Willem II,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25
2,Twente,Heracles,1.13,6.0,1.44,2.7,2.0,1.73,3.5,1.29,1.22,4.0
3,Utrecht,Feyenoord,1.2,4.33,1.67,2.15,2.63,1.44,5.0,1.17,1.3,3.4
4,Heerenveen,Sparta Rotterdam,1.17,5.0,1.57,2.35,2.38,1.53,4.0,1.22,1.29,3.5


In [195]:
driver = webdriver.Chrome(service=service)

ht_over15, ht_under15 = [], []

for link in ht_over15_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    elements = soup.select('.max-mm\:hidden , .gap-\[3px\] .height-content')

    # Find the index of the element containing the text "bet365"
    bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    ht_over15.append(float(elements[bet365_index + 1].text))
    ht_under15.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['HT1.5O'], over_df['HT1.5U'] = ht_over15, ht_under15
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
0,AZ Alkmaar,G.A. Eagles,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.33,3.25,2.63,1.44
1,Ajax,Willem II,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5
2,Twente,Heracles,1.13,6.0,1.44,2.7,2.0,1.73,3.5,1.29,1.22,4.0,2.0,1.73
3,Utrecht,Feyenoord,1.2,4.33,1.67,2.15,2.63,1.44,5.0,1.17,1.3,3.4,2.5,1.5
4,Heerenveen,Sparta Rotterdam,1.17,5.0,1.57,2.35,2.38,1.53,4.0,1.22,1.29,3.5,2.38,1.53


# Merging all Dataframes Together and Saving

In [196]:
merged_df = ft_df.merge(over_df, on = ['Home', 'Away'], how = 'inner')

merged_df.to_excel('bet365_odds.xlsx', index=False)
merged_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,...,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
0,AZ Alkmaar,G.A. Eagles,1.7,3.8,5.0,1.17,1.25,2.1,2.3,2.3,...,1.75,2.05,2.75,1.4,5.5,1.14,1.33,3.25,2.63,1.44
1,Ajax,Willem II,1.38,5.0,7.5,1.1,1.17,3.0,1.83,2.5,...,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5
2,Twente,Heracles,1.22,7.0,11.0,1.05,1.1,4.0,1.62,2.88,...,1.44,2.7,2.0,1.73,3.5,1.29,1.22,4.0,2.0,1.73
3,Utrecht,Feyenoord,4.0,3.8,1.85,1.91,1.25,1.22,4.33,2.38,...,1.67,2.15,2.63,1.44,5.0,1.17,1.3,3.4,2.5,1.5
4,Heerenveen,Sparta Rotterdam,2.3,3.6,2.9,1.4,1.29,1.57,2.88,2.38,...,1.57,2.35,2.38,1.53,4.0,1.22,1.29,3.5,2.38,1.53
