# Scraping Interactive Match Links From Main Page

In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from itertools import islice
from selenium.webdriver.chrome.service import Service
import warnings
from webdriver_manager.chrome import ChromeDriverManager
warnings.filterwarnings("ignore")

# Initialize the driver
driver_path = 'C:/Users/99451/AppData/Local/Programs/Python/Python312/Lib/site-packages/selenium/webdriver/chromedriver-win64/chromedriver.exe'
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Open the OddsPortal page
driver.get('https://www.oddsportal.com/matches/football/20241212/')

# Function to scroll the page and load more matches
def scroll_to_load_matches():
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for the new content to load

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break  # Exit the loop if no more matches are loaded
        last_height = new_height

# Scroll to load all matches
scroll_to_load_matches()

# Find all links on the page
links = driver.find_elements(By.TAG_NAME, 'a')

# Extract URLs from the link elements
urls = [link.get_attribute('href') for link in links if link.get_attribute('href')]

# Close the driver
driver.quit()

# Filter out None or empty links
urls = [url for url in urls if url]

# Dictionary to store longest URLs for each match
longest_urls = {}

# Regex pattern to identify match URLs (this may need to be adjusted based on the actual URL structure)
match_pattern = re.compile(r'/football/.+/.+/[^/]+-[^/]+-[\w-]+/')

for url in urls:
    # Check if the URL matches the match pattern
    if match_pattern.search(url):
        # Extract match identifier
        parts = url.split('/')
        match_identifier = parts[-2]  # This may represent the match

        # Store the longest URL for each match
        if match_identifier not in longest_urls or len(url) > len(longest_urls[match_identifier]):
            longest_urls[match_identifier] = url

# Extract countries and leagues using list comprehensions
countries = [url.split('/')[4] for url in longest_urls.values()]
leagues = [url.split('/')[5] for url in longest_urls.values()]

# List of Leagues and Countries To Choose From
my_list_countries = ['argentina', 'austria', 'belgium', 'brazil', 'denmark', 
                  'england', 'france', 'germany', 'greece', 'italy', 'europe',
                   'mexico', 'netherlands', 'norway','poland', 'portugal',
                  'scotland', 'spain', 'sweden', 'switzerland', 'turkey', 'usa']

my_list_leagues = ['torneo-betano', 'bundesliga', 'jupiler-pro-league', 'serie-a-betano',
                   'superliga', 'premier-league', 'championship', 'league-one', 'league-two',
                   'national-league', 'ligue-1', 'ligue-2', '2-bundesliga', 'super-league',
                   'serie-a', 'serie-b', 'liga-mx', 'eredivisie', 'eliteserien', 'ekstraklasa',
                   'liga-portugal', 'liga-portugal-2', 'premiership', 'laliga', 'laliga2',
                   'allsvenskan', 'super-lig', 'uefa-nations-league',
                   'champions-league', 'europa-league', 'conference-league']

# Filter the dictionary
filtered_dict = {
    key: url for key, url in longest_urls.items()
    if (url.split('/')[4] in my_list_countries) and (url.split('/')[5] in my_list_leagues)
}

# Get the first 26 key-value pairs
#filtered_dict = dict(islice(filtered_dict.items(), 26))

# Skip the first 2 items and keep the rest
#filtered_dict = dict(islice(filtered_dict.items(), 2, None))

# Convert the longest_urls dictionary to a list of tuples
longest_url_items = list(filtered_dict.items())

print("Number of Games Found: ", len(filtered_dict.values()))
# Print the first 5 matches and links
print("First 5 Matches:")
for match, long_url in longest_url_items[:5]:
    print(f"Match Identifier: {match}, Longest URL: {long_url}")

# Print a separator
print("\n...\n")  # Optional separator between sections

# Print the last 5 matches and links
print("Last 5 Matches:")
for match, long_url in longest_url_items[-5:]:
    print(f"Match Identifier: {match}, Longest URL: {long_url}")

Number of Games Found:  35
First 5 Matches:
Match Identifier: vikingur-reykjavik-djurgarden-bFnBiVPP, Longest URL: https://www.oddsportal.com/football/europe/conference-league/vikingur-reykjavik-djurgarden-bFnBiVPP/
Match Identifier: fc-astana-chelsea-vHdpzlx2, Longest URL: https://www.oddsportal.com/football/europe/conference-league/fc-astana-chelsea-vHdpzlx2/
Match Identifier: fiorentina-lask-linz-QmmKnRL7, Longest URL: https://www.oddsportal.com/football/europe/conference-league/fiorentina-lask-linz-QmmKnRL7/
Match Identifier: fc-copenhagen-hearts-pSdvhMcm, Longest URL: https://www.oddsportal.com/football/europe/conference-league/fc-copenhagen-hearts-pSdvhMcm/
Match Identifier: petrocub-hincesti-betis-K2zwUFA4, Longest URL: https://www.oddsportal.com/football/europe/conference-league/petrocub-hincesti-betis-K2zwUFA4/

...

Last 5 Matches:
Match Identifier: ajax-lazio-QTlyIMUF, Longest URL: https://www.oddsportal.com/football/europe/europa-league/ajax-lazio-QTlyIMUF/
Match Identifier

In [None]:
# Remove the key-value pair
#del filtered_dict["haugesund-moss-nuB3V6Xr"]

# Getting Inside of Match Links and Scraping Full Time Odds

In [16]:
# links = ['Full Time', 'Double Chance', 'Half Time', 'HT Double Chance', 'BTTS']
extensions = ['#1X2;2', '#double;2', '#1X2;3', '#double;3' ,'#bts;2']
full_time_links = [link + extensions[0] for link in filtered_dict.values()]
double_chance_links = [link + extensions[1] for link in filtered_dict.values()]
half_time_links = [link + extensions[2] for link in filtered_dict.values()]
ht_double_chance_links = [link + extensions[3] for link in filtered_dict.values()]
btts_links = [link + extensions[4] for link in filtered_dict.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
home, away, ft1, ftx, ft2 = [], [], [], [], []

for link in full_time_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    #print(soup.select('.max-sm\:\!mb-5 p')[0].text)
    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')
        teams = soup.select('.leading-4 p')

        # Extract the teams content from each span element
        match_text = teams[0].get_text()
        home_team, away_team = [team.strip() for team in match_text.split(' - ')]

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            # Refresh the page
            driver.refresh()

            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            elements = soup.select('.max-mm\:hidden , .hidden.underline')
            teams = soup.select('.leading-4 p')

            # Extract the teams content from each span element
            match_text = teams[0].get_text()
            home_team, away_team = [team.strip() for team in match_text.split(' - ')]

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

        home.append(home_team)
        away.append(away_team)
        ft1.append(float(elements[bet365_index + 1].text))
        ftx.append(float(elements[bet365_index + 2].text))
        ft2.append(float(elements[bet365_index + 3].text))

driver.quit()

# Create a DataFrame from the lists
ft_df = pd.DataFrame({
    'Home': home,
    'Away': away,
    'FT1': ft1,
    'FTX': ftx,
    'FT2': ft2
})

print('Matches Found: ', len(ft_df))
ft_df.head()

Matches Found:  35


Unnamed: 0,Home,Away,FT1,FTX,FT2
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15
1,FC Astana,Chelsea,9.0,5.0,1.33
2,Fiorentina,LASK,1.36,5.0,8.5
3,FC Copenhagen,Hearts,1.38,4.75,7.5
4,Petrocub,Betis,12.0,7.5,1.18


# Taking Full Time Double Chance Odds

In [17]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
dc1x, dc12, dcx2 = [], [], []

for link in double_chance_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            # Refresh the page
            driver.refresh()

            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    
        dc1x.append(float(elements[bet365_index + 1].text))
        dc12.append(float(elements[bet365_index + 2].text))
        dcx2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['DC1X'], ft_df['DC12'], ft_df['DCX2'] = dc1x, dc12, dcx2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15,1.67,1.29,1.36
1,FC Astana,Chelsea,9.0,5.0,1.33,3.25,1.17,1.07
2,Fiorentina,LASK,1.36,5.0,8.5,1.08,1.17,3.0
3,FC Copenhagen,Hearts,1.38,4.75,7.5,1.1,1.18,3.0
4,Petrocub,Betis,12.0,7.5,1.18,4.5,1.1,1.05


# Taking Half Time Odds

In [18]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht1, htx, ht2 = [], [], []

for link in half_time_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            # Refresh the page
            driver.refresh()

            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

        ht1.append(float(elements[bet365_index + 1].text))
        htx.append(float(elements[bet365_index + 2].text))
        ht2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['HT1'], ft_df['HTX'], ft_df['HT2'] = ht1, htx, ht2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15,1.67,1.29,1.36,3.6,2.2,2.88
1,FC Astana,Chelsea,9.0,5.0,1.33,3.25,1.17,1.07,7.0,2.63,1.8
2,Fiorentina,LASK,1.36,5.0,8.5,1.08,1.17,3.0,1.83,2.5,7.5
3,FC Copenhagen,Hearts,1.38,4.75,7.5,1.1,1.18,3.0,1.91,2.4,7.0
4,Petrocub,Betis,12.0,7.5,1.18,4.5,1.1,1.05,12.0,2.88,1.57


# Taking Half Time Double Chance Odds

In [19]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht1x, ht12, htx2 = [], [], []

for link in ht_double_chance_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            # Refresh the page
            driver.refresh()

            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

    
        ht1x.append(float(elements[bet365_index + 1].text))
        ht12.append(float(elements[bet365_index + 2].text))
        htx2.append(float(elements[bet365_index + 3].text))

driver.quit()

ft_df['HT1X'], ft_df['HT12'], ft_df['HTX2'] = ht1x, ht12, htx2
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15,1.67,1.29,1.36,3.6,2.2,2.88,1.4,1.62,1.29
1,FC Astana,Chelsea,9.0,5.0,1.33,3.25,1.17,1.07,7.0,2.63,1.8,1.91,1.44,1.08
2,Fiorentina,LASK,1.36,5.0,8.5,1.08,1.17,3.0,1.83,2.5,7.5,1.07,1.5,1.83
3,FC Copenhagen,Hearts,1.38,4.75,7.5,1.1,1.18,3.0,1.91,2.4,7.0,1.08,1.53,1.8
4,Petrocub,Betis,12.0,7.5,1.18,4.5,1.1,1.05,12.0,2.88,1.57,2.25,1.4,1.05


# Taking BTTS Odds

In [20]:
driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
btts, otts = [], []

for link in btts_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
        # Refresh the page
            driver.refresh()

                # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
    
        btts.append(float(elements[bet365_index + 1].text))
        otts.append(float(elements[bet365_index + 2].text))

driver.quit()

ft_df['BTTS'], ft_df['OTTS'] = btts, otts
ft_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15,1.67,1.29,1.36,3.6,2.2,2.88,1.4,1.62,1.29,1.67,2.1
1,FC Astana,Chelsea,9.0,5.0,1.33,3.25,1.17,1.07,7.0,2.63,1.8,1.91,1.44,1.08,1.8,1.91
2,Fiorentina,LASK,1.36,5.0,8.5,1.08,1.17,3.0,1.83,2.5,7.5,1.07,1.5,1.83,1.91,1.8
3,FC Copenhagen,Hearts,1.38,4.75,7.5,1.1,1.18,3.0,1.91,2.4,7.0,1.08,1.53,1.8,1.91,1.8
4,Petrocub,Betis,12.0,7.5,1.18,4.5,1.1,1.05,12.0,2.88,1.57,2.25,1.4,1.05,2.25,1.57


# Taking Over/Under Odds Full Time

In [21]:
# links = ['Over/Under 1.5', 'Over/Under 2.5', 'Over/Under 3.5', 'Over/Under 4.5']
over_under = ['#over-under;2;1.50;0', '#over-under;2;2.50;0', '#over-under;2;3.50;0', '#over-under;2;4.50;0']
over15_links = [link + over_under[0] for link in filtered_dict.values()]
over25_links = [link + over_under[1] for link in filtered_dict.values()]
over35_links = [link + over_under[2] for link in filtered_dict.values()]
over45_links = [link + over_under[3] for link in filtered_dict.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
home, away, over15, under15 = [], [], [], []

for link in over15_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        teams = soup.select('.leading-4 p')
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Extract the teams content from each span element
        match_text = teams[0].get_text()
        home_team, away_team = [team.strip() for team in match_text.split(' - ')]

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            driver.refresh()

            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)

            # Scroll to load all matches
            scroll_to_load_matches()

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            teams = soup.select('.leading-4 p')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Extract the teams content from each span element
            match_text = teams[0].get_text()
            home_team, away_team = [team.strip() for team in match_text.split(' - ')]

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

        home.append(home_team)
        away.append(away_team)
        over15.append(float(elements[bet365_index + 1].text))
        under15.append(float(elements[bet365_index + 2].text))

driver.quit()

# Create a DataFrame from the lists
over_df = pd.DataFrame({
    'Home': home,
    'Away': away,
    '1.5O': over15,
    '1.5U': under15,
})

over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0
1,FC Astana,Chelsea,1.14,5.5
2,Fiorentina,LASK,1.18,4.5
3,FC Copenhagen,Hearts,1.22,4.0
4,Petrocub,Betis,1.14,5.5


# Taking Over/Under 2.5 Odds

In [22]:
driver = webdriver.Chrome(service=service)

over25, under25 = [], []

for link in over25_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            # Refresh the page
            driver.refresh()
            driver.implicitly_wait(3)
            scroll_to_load_matches()
            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')
            
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        
        over25.append(float(elements[bet365_index + 1].text))
        under25.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['2.5O'], over_df['2.5U'] = over25, under25
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0,1.8,2.0
1,FC Astana,Chelsea,1.14,5.5,1.5,2.5
2,Fiorentina,LASK,1.18,4.5,1.62,2.25
3,FC Copenhagen,Hearts,1.22,4.0,1.7,2.1
4,Petrocub,Betis,1.14,5.5,1.5,2.5


# Taking Over/Under 3.5 Odds

In [23]:
driver = webdriver.Chrome(service=service)

over35, under35 = [], []

for link in over35_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            driver.refresh()
            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)
            # Scroll to load all matches
            scroll_to_load_matches()

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')
            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        
        over35.append(float(elements[bet365_index + 1].text))
        under35.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['3.5O'], over_df['3.5U'] = over35, under35
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0,1.8,2.0,3.0,1.36
1,FC Astana,Chelsea,1.14,5.5,1.5,2.5,2.2,1.62
2,Fiorentina,LASK,1.18,4.5,1.62,2.25,2.5,1.5
3,FC Copenhagen,Hearts,1.22,4.0,1.7,2.1,2.63,1.44
4,Petrocub,Betis,1.14,5.5,1.5,2.5,2.2,1.62


# Taking Over/Under 4.5 Odds

In [24]:
driver = webdriver.Chrome(service=service)

over45, under45 = [], []

for link in over45_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            driver.refresh()
            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)
            # Scroll to load all matches
            scroll_to_load_matches()

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')
            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)

        over45.append(float(elements[bet365_index + 1].text))
        under45.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['4.5O'], over_df['4.5U'] = over45, under45
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0,1.8,2.0,3.0,1.36,5.5,1.14
1,FC Astana,Chelsea,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25
2,Fiorentina,LASK,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18
3,FC Copenhagen,Hearts,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17
4,Petrocub,Betis,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25


# Taking Half Time Over/Under Odds

In [25]:
# links = ['Over/Under 0.5', 'Over/Under 1.5']
ht_over_under = ['#over-under;3;0.50;0', '#over-under;3;1.50;0']
ht_over05_links = [link + ht_over_under[0] for link in filtered_dict.values()]
ht_over15_links = [link + ht_over_under[1] for link in filtered_dict.values()]

driver = webdriver.Chrome(service=service)

# Extracting float values from the corresponding elements
ht_over05, ht_under05 =  [], []

for link in ht_over05_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            driver.refresh()
            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)
            # Scroll to load all matches
            scroll_to_load_matches()

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')
            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
            
        ht_over05.append(float(elements[bet365_index + 1].text))
        ht_under05.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['HT0.5O'], over_df['HT0.5U'] = ht_over05, ht_under05
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0,1.8,2.0,3.0,1.36,5.5,1.14,1.36,3.0
1,FC Astana,Chelsea,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75
2,Fiorentina,LASK,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4
3,FC Copenhagen,Hearts,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25
4,Petrocub,Betis,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75


In [26]:
driver = webdriver.Chrome(service=service)

ht_over15, ht_under15 = [], []

for link in ht_over15_links:
    driver.get(link)

    # Give time for dynamic content to load
    driver.implicitly_wait(3)  # or time.sleep(seconds)

    # Scroll to load all matches
    scroll_to_load_matches()

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    if soup.select('.max-sm\:\!mb-5 p')[0].text != 'Postponed':
        elements = soup.select('.max-mm\:hidden , .hidden.underline')

        # Find the index of the element containing the text "bet365"
        bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
        if bet365_index == None:
            driver.refresh()
            # Give time for dynamic content to load
            driver.implicitly_wait(3)  # or time.sleep(seconds)
            # Scroll to load all matches
            scroll_to_load_matches()

            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            elements = soup.select('.max-mm\:hidden , .hidden.underline')

            # Find the index of the element containing the text "bet365"
            bet365_index = next((index for index, element in enumerate(elements) if 'bet365' in str(element)), None)
            
        ht_over15.append(float(elements[bet365_index + 1].text))
        ht_under15.append(float(elements[bet365_index + 2].text))

driver.quit()

over_df['HT1.5O'], over_df['HT1.5U'] = ht_over15, ht_under15
over_df.head()

Unnamed: 0,Home,Away,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
0,Vikingur Reykjavik,Djurgarden,1.22,4.0,1.8,2.0,3.0,1.36,5.5,1.14,1.36,3.0,2.63,1.44
1,FC Astana,Chelsea,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75,2.2,1.62
2,Fiorentina,LASK,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.38,1.53
3,FC Copenhagen,Hearts,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.63,1.44
4,Petrocub,Betis,1.14,5.5,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75,2.2,1.62


# Merging all Dataframes Together and Saving

In [27]:
merged_df = ft_df.merge(over_df, on = ['Home', 'Away'], how = 'inner')

merged_df.to_excel('bet365_odds.xlsx', index=False)
merged_df.head()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,...,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
0,Vikingur Reykjavik,Djurgarden,3.1,3.6,2.15,1.67,1.29,1.36,3.6,2.2,...,1.8,2.0,3.0,1.36,5.5,1.14,1.36,3.0,2.63,1.44
1,FC Astana,Chelsea,9.0,5.0,1.33,3.25,1.17,1.07,7.0,2.63,...,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75,2.2,1.62
2,Fiorentina,LASK,1.36,5.0,8.5,1.08,1.17,3.0,1.83,2.5,...,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.38,1.53
3,FC Copenhagen,Hearts,1.38,4.75,7.5,1.1,1.18,3.0,1.91,2.4,...,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.63,1.44
4,Petrocub,Betis,12.0,7.5,1.18,4.5,1.1,1.05,12.0,2.88,...,1.5,2.5,2.2,1.62,3.75,1.25,1.25,3.75,2.2,1.62
