### Crawler for Esports Arbitrage

This notebook crawls the bookmakers Orbit, BuffBet, 1xBit, Pinnacle/PS3838, SBObet and BetinAsia for odds of upcoming esports and soccer games and finds arbitrage opportunities in two-way matches like handicap bets or matches where there are only winners and losers but no draws.

Tech used: this notebook relies heavily on Selenium for manipulating browser behaviour and BeautifulSoup/requests for scraping. 

In [2]:
#Package import
from selenium import webdriver
import webbrowser
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
import os
from datetime import datetime
from selenium.webdriver import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from IPython.display import display, HTML
from IPython.display import clear_output
import json

#for MacOS
#from webdriver_manager.firefox import GeckoDriverManager 

In [3]:
#Defining the browser webdriver

profile = webdriver.FirefoxProfile("C:\\Users\\ULTRA\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\ffprofile.selenium")

profile.set_preference("dom.webdriver.enabled", False)
profile.set_preference('useAutomationExtension', False)
profile.update_preferences()
desired = DesiredCapabilities.FIREFOX

#Windows:
driver = webdriver.Firefox(executable_path="C:\\Users\\ULTRA\\Downloads\\tools\\geckodriver.exe", firefox_profile=profile, desired_capabilities=desired)

#MacOS:
#driver = webdriver.Firefox(executable_path=GeckoDriverManager().install()) #, firefox_profile=profile, desired_capabilities=desired)

#Checks setting box to open links in new tab
driver.get("about:preferences")
checked = driver.find_element_by_id("linkTargeting")
checked.click()

In [4]:
# Define websites to visit
PB = 'https://en.pari-match.com/en/e-sports'
BB = 'https://buff.bet/en/esports'
LB = 'https://loot.bet/'
one_X = 'https://1xbit.com/line/Esports/'
orbit_X = 'https://www.orbitxch.com/customer/sport/27454571'
PS = 'https://www.ps3838.com/en/euro/sports/e-sports'
PS_soccer = 'https://www.ps3838.com/en/euro/sports/soccer/germany'
SBO_soccer = 'https://www.sbobet.com/euro/football/germany'
BL = 'https://black.betinasia.com/login?next=/trade'

In [None]:
# Open new tabs and open URL

for site in [orbit_X, orbit_X, BB, one_X, PS, PS_soccer, SBO_soccer, BL]:
    script = 'window.open("' + site +'");'
    print(script)
    driver.execute_script(script)

# If neccessary you can login now

In [6]:
# Functions to grab the data from websites

def get_data_1x():
    ###
    ### GRAB DATA from 1xbit
    ###
    driver.switch_to.window(driver.window_handles[4])
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Scrape relevant info from soup
    team_list = []
    odds_list = []
    time_list = []

    containers = soup.findAll("div", {"class": "c-events__item_col"})
    for container in containers:
        teams = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "c-events__team"}
        )]
        if len(teams) == 2:
            team_list.append(teams)

            odds = [x.get_text().strip() for x in container.findAll(
                "span", {"class": "c-bets__bet"}
            )]
            odds = [i for i in odds if i != "-"] # removes dashes

            if len(odds) >= 3: # Marking 1x2 bets as invalid with "NaN"
                odds = ["NaN", "NaN"]

            odds_list.append(float(odd) for odd in odds)

            times = [x.get_text().strip() for x in container.findAll(
                "div", {"class": "c-events__time"}
            )]
            time_list.append(times)
        else:
            pass

    # Create DF
    df_1X = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_1X = df_1X.join(df_odds).join(df_times)
    df_1X["Bet_Type"] = "1/2"

    return df_1X

In [7]:
def get_data_lb():
    ###
    ###GRAB DATA from LootBet
    ###
    driver.switch_to.window(driver.window_handles[number])
    
    y = 0
    for timer in range(0,20):
        driver.execute_script("window.scrollTo(0, "+str(y)+")")
        y += 35
        time.sleep(0.01)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Scrape relevant info from soup
    team_list = []
    odds_list = []
    date_list = []
    time_list = []

    containers = soup.findAll("div", {"class": "itemNew hover-market"})
    for container in containers:
        teams = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "name"}
        )]
        if len(teams) == 3:
            del teams[1]

        team_list.append(teams)

        odds = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "cof"}
        )]
        if len(odds) == 3: # Marking 1x2 bets as invalid with "NaN"
            odds = ["NaN", "NaN"]
        elif len(odds) > 3:
            odds = odds[0:2]
        odds_list.append(float(odd) for odd in odds)

        date = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "date"}
        )]
        date_list.append(date)

        times = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "time"}
        )]
        time_list.append(times)

    #C reate DF
    df_LB = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_LB = df_LB.join(df_odds).join(df_dates).join(df_times)

    df_LB["Time"] = df_LB["Date"] + " " + df_LB["Time"]
    df_LB = df_LB.drop("Date", axis=1)

    df_LB = df_LB[:70] # Limit to 70 matches
    
    return df_LB

In [8]:
def get_data_pb():
    ###
    ##GRAB DATA from Pari
    ###
    driver.switch_to.window(driver.window_handles[number])
    
    y = 0
    for timer in range(0,50):
        driver.execute_script("window.scrollTo(0, "+str(y)+")")
        y += 35
        time.sleep(0.01)
        
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Scrape relevant info from soup
    team_list = []
    odds_list = []
    date_list = []
    time_list = []

    containers = soup.findAll("div", {"class": "_3i6j5pH655bYkz5944HSdq"}) #
    for container in containers:
        teams = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "_2-4kPKVpNrNoq_0Ylv6TFX"}
        )]
        team_list.append(teams)

        odds = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "_1TBLfOVfJx5AZbnqsME6Y5 _2QYjpCvQKAOO3cGwQDvfax"}
        )]
        if len(odds) == 0:
            odds = ["NaN", "NaN"]
        if len(odds) == 3: # Marking 1x2 bets as invalid with "NaN"
            odds = ["NaN", "NaN"]
        if len(odds) == 4:
            odds = odds[0:2]
        if len(odds) == 5: # Marking 1x2 bets as invalid with "NaN"
            odds = ["NaN", "NaN"]
        if odds == ['--', '--']:
            odds = ["NaN", "NaN"]

        try: 
            odds_list.append(float(odd) for odd in odds)
        except:
            odds_list.append(["NaN", "NaN"])

        dates = [x.get_text().strip() for x in container.findAll(
            "div", {"class": "sn74bYiEiedAa1IAXv20h"}
        )]
        dates = dates[0].split("/")
        date = dates[0].strip() + " 2021"
        times = dates[1].strip()

        date_list.append(date)
        time_list.append(times)
        
    # Create DF
    df_PB = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_PB = df_PB.join(df_odds).join(df_dates).join(df_times)

    df_PB["Time"] = df_PB["Date"] + " " + df_PB["Time"]
    df_PB = df_PB.drop("Date", axis=1)
    
    return df_PB

In [9]:
def get_data_bb():
    ###
    #GRAB DATA from Buff
    ###
    
    # Alternative: when site is blocked by hcaptcha:
    # Copy the html code from the website into a textfile "txt.txt" and save it
    # The following 3 lines of code reads html code from the local textfile and
    # stores it in "soup"
    # If you do this, uncomment the following 3 lines and comment out the lines 16 and 17
    
    #f = open("txt.txt", encoding="utf8")
    #data = f.read()
    #soup = BeautifulSoup(data, 'html.parser')
    
    driver.switch_to.window(driver.window_handles[3])
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Scrape relevant info from soup
    team_list = []
    odds_list = []
    date_list = []
    time_list = []

    containers = soup.findAll("div", {"class": "_38bJi"}) #
    for container in containers:
        teams = [x.get_text().strip() for x in container.findAll(
            "a", {"class": "_3bf_k _1WIu4"}
        )]
        if len(teams) == 3:
            del teams[1]
        team_list.append(teams)

        odds = [x.get_text().strip() for x in container.findAll("div", {"class": "_1sT8o _2ALVH"}
        )]
        if len(odds) == 0 or odds == ['--', '--'] or odds == ['', ''] or len(odds) == 3: # Marking 1x2 bets as invalid with "NaN"
            odds = ["NaN", "NaN"]
        elif len(odds) > 3:
            odds = odds[0:2]
        for odd in odds:
            if odd == "":
                odds = ["NaN", "NaN"]
        try: 
            odds_list.append(float(odd.replace(",",".")) for odd in odds)
        except:
            odds_list.append(["NaN", "NaN"])

        dates = [x.get_text().strip() for x in container.findAll(
            "div", {"class": "_1jkHU"}
        )]
        if len(dates) == 0:
            dates = ["LIVE LIVE LIVE"]
        dates = dates[0].split(" ")
        date = str(dates[0]) + " " + str(dates[1]) + " 2021"
        times = dates[2]
        date_list.append(date)
        time_list.append(times)
        
    # Create DF
    df_BB = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_BB = df_BB.join(df_odds).join(df_dates).join(df_times)

    df_BB["Time"] = df_BB["Date"] + " " + df_BB["Time"]
    df_BB["Bet_Type"] = "1/2"
    df_BB = df_BB.drop("Date", axis=1)
    
    return df_BB

In [10]:
def get_data_ox_page(window):
    ###
    #GRAB DATA from Orbit/Betfair
    ###
    driver.switch_to.window(driver.window_handles[window])

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    team_list = []
    odds_list = []
    date_list = []
    time_list = []
    lays = []

    containers = soup.findAll("div", {"class": "biab_table-wrapper"}) #
    for container in containers:
        teams = [x for x in container.findAll(
            "div", {"class": "biab_market-title-team-names js-teams"})]

        for word in ['<div class="biab_market-title-team-names js-teams">', '</div>', '[', ']']:
            teams = str(teams).replace(word,'')
            teams = str(teams).replace('<br/>',', ')
        team_1 = teams.split(",")[0]
        team_2 = teams.split(",")[1:][0].strip()
        teams = [team_1, team_2]
        team_list.append(teams)

        odds = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "js-odds biab_odds"})]

        if len(odds) == 4:
            lay = []
            lay.append(float(odds[1]))
            lay.append(float(odds[3]))
            lays.append(lay)
            del odds[1]
            del odds[2]
        else:
            lays.append([0,0])
        
        if len(odds) == 4:
            
            del odds[1]
            del odds[2]
        if len(odds) == 0:
            odds = ["NaN", "NaN"]
        if len(odds) == 3:
            odds = ["NaN", "NaN"]
        if len(odds) == 5:
            odds = ["NaN", "NaN"]
        if len(odds) == 6:
            odds = ["NaN", "NaN"]
        if odds == ['--', '--', '--', '--']:
            odds = ["NaN", "NaN"]

        try: 
            odds_list.append(float(odd.replace(",",".")) for odd in odds)
        except:
            odds_list.append(["NaN", "NaN"])

        dates = [x.get_text().strip() for x in soup.findAll(
            "div", {"class": "biab_inplay-sport-item-title"}
            )]

        try:
            date = dates[0][4:] + " 2021"
        except:
            date = ['---']

        date_list.append(date)

        times = [x.get_text().strip() for x in container.findAll(
            "span", {"class": "biab_market-time"}
            )]
        if times == ['In-Play'] or times == ['Starting soon']:
            times = ['LIVE']
        time_list.append(times)
        
    # Create DF
    df_OX = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_lays = pd.DataFrame(lays, columns=["Lay_1", "Lay_2"])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_OX = df_OX.join(df_odds).join(df_lays).join(df_dates).join(df_times)

    df_OX["Time"] = df_OX["Date"] + " " + df_OX["Time"]
    df_OX["Bet_Type"] = "1/2"
    df_OX = df_OX.drop("Date", axis=1)
    
    
    return df_OX

def get_data_ox():
    data_1 = get_data_ox_page(1)
    data_2 = get_data_ox_page(2)
    L = [data_1, data_2]
    df_OX = pd.concat(L)
    
    return df_OX

In [11]:
def get_data_ps_page():
    ###
    ### Grab data from PS3838/Pinnacle
    ###
    driver.switch_to.window(driver.window_handles[5])
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    team_list = []
    odds_list = []
    date_list = []
    time_list = []

    containers = soup.findAll("tr", {"class": "status_O"}) #

    for container in containers:

        # Get teams
        teams = [x.get_text().strip().replace(u'\u200e',"") for x in container.findAll(
            "span", {"class": "team_name onextwo"})]
        if len(teams) != 0:
            team_list.append(teams)

        # Get odds
        odds = [x.get_text() for x in container.findAll("span", {"class": "o_right"})]
        
        if len(teams) != 0:
            try:
                if odds != ['', '']:
                    odds_list.append(float(odd) for odd in odds)
                else:
                    odds_list.append(["Nan", "Nan"])
            except:
                odds_list.append(["Nan", "Nan"])
                
        # Get date & time
        date = [x.get_text() for x in container.findAll("span", {"class": "DateTime"})]
        
        if date[0].count(' ') > 1:
            date = [date[0]]
            times = ["LIVE"]
        else:
            times = [date[0][-5:]]
            date = [date[0][:6]]
            
        if len(teams) != 0:
            date_list.append(date)
            time_list.append(times)

    # Create DF
    df_PS = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_PS = df_PS.join(df_odds).join(df_dates).join(df_times)

    df_PS["Time"] = df_PS["Date"] + " " + df_PS["Time"]
    df_PS["Bet_Type"] = "1/2"
    df_PS = df_PS.drop("Date", axis=1)

    return df_PS

def get_data_ps():
    data_1 = get_data_ps_page()
    time.sleep(1)
    driver.find_element_by_css_selector('.dateMenutb > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2)').click() 
    time.sleep(1)
    data_2 = get_data_ps_page()
    L = [data_1, data_2]
    df_PS = pd.concat(L)
    driver.find_element_by_css_selector('.dateMenutb > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(1)').click()
    
    return df_PS

In [12]:
def get_data_fl():
    ###
    ##GRAB DATA from Fairlay
    ###
    
    # Grabbing data from Fairlay
    r = requests.get('http://83.171.236.114:8080/free/markets/{"Cat":32,"Descr":"Match", "_Period":[1],"_Type":[0]}')
    res = json.loads(r.content.decode('utf-8'))
    
    #Scrape relevant info from API request
    team_list = []
    odds_list = []
    date_list = []
    time_list = []
    commission = 0
    
    for game in res:
        if game["Descr"] == "Match":
            team_1 = game["Ru"][0]["Name"] # Player_1
            team_2 = game["Ru"][1]["Name"] # Player_2
            team_list.append([team_1, team_2])

            odds = game["OrdBStr"]
            if len(odds) == 0 or odds == "~":
                odds = 0
                odds_list.append([odds, odds])
            else:
                odds = odds[:-1]
                dic = json.loads(odds)
                odd_back = float(dic["Bids"][0][0])
                odd_lay = float(dic["Asks"][0][0])
                odd_lay_eff = round(1 + ((1-commission)/(odd_lay- 1)), 3)
                odds_list.append([odd_back, odd_lay_eff])

            time_ = game["ClosD"]
            try:
                time_ = datetime.fromisoformat(time_[:-1])
                game_time = time_.strftime('%H:%M')
                game_day = time_.strftime('%d-%m-%Y')
            except:
                time_ = "NaN"
                game_time = "NaN"
                game_day = "NaN"
            date_list.append([game_day])
            time_list.append([game_time])
        
    # Create DF
    df_FL = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_FL = df_FL.join(df_odds).join(df_dates).join(df_times)

    df_FL["Time"] = df_FL["Date"] + " " + df_FL["Time"]
    df_FL["Bet_Type"] = "1/2"
    df_FL = df_FL.drop("Date", axis=1)
    
    return df_FL

In [13]:
def get_data_fl_soccer():
    ###
    ##GRAB Soccer DATA from Fairlay
    ###
    
    # Grabbing data from Fairlay
    r = requests.get('http://83.171.236.114:8080/free/markets/%7B%22Cat%22:1,%22Descr%22:%22Spread%22,%20%22Comp%22:%20%22Germany%20-%20Bundesliga%202%22%7D')
    res = json.loads(r.content.decode('utf-8'))
    
    # Scrape relevant info from API request
    team_list = []
    odds_list = []
    date_list = []
    time_list = []
    bet_type_list = []
    commission = 0
    
    for game in res:
         if "Spread" in game["Descr"]:
            team_1 = game["Ru"][0]["Name"] # Player_1
            team_2 = game["Ru"][1]["Name"] # Player_2
            team_list.append([team_1, team_2])

            odds = game["OrdBStr"]
            if len(odds) == 0 or odds == "~":
                odds = 0
                odds_list.append([odds, odds])
            else:
                odds = odds[:-1]
                dic = json.loads(odds)
                odd_back = float(dic["Bids"][0][0])
                odd_lay = float(dic["Asks"][0][0])
                odd_lay_eff = round(1 + ((1-commission)/(odd_lay- 1)), 3)
                odds_list.append([odd_back, odd_lay_eff])

            time_ = game["ClosD"]
            try:
                time_ = datetime.fromisoformat(time_[:-1])
                game_time = time_.strftime('%H:%M')
                game_day = time_.strftime('%d-%m-%Y')
            except:
                time_ = "NaN"
                game_time = "NaN"
                game_day = "NaN"
            date_list.append([game_day])
            time_list.append([game_time])
            
            bet_type_list.append(game["Descr"])
        
    # Create DF
    df_FL = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_bet_types = pd.DataFrame(bet_type_list, columns=['Bet_Type'])
    df_FL = df_FL.join(df_odds).join(df_dates).join(df_times).join(df_bet_types)
    df_FL["Time"] = df_FL["Date"] + " " + df_FL["Time"]
    
    for nr in ["0.0", "0.25", "0.5", "0.75", "1.0", "1.25", "1.5", "1.75", "2.0","2.5", "-0.0", "-0.25", "-0.5", "-0.75", "-1.0", "-1.25", "-1.5", "-1.75", "-2.0", "-2.5"]:
        bet = "Match Spread for HomeTeam "
        string = bet + nr
        df_FL= df_FL.replace(to_replace =string, 
                            value =" AHM")
    for nr in ["0.0", "0.25", "0.5", "0.75", "1.0", "1.25", "1.5", "1.75", "2.0","2.5", "-0.0", "-0.25", "-0.5", "-0.75", "-1.0", "-1.25", "-1.5", "-1.75", "-2.0", "-2.5"]:
        bet = "1st Half Spread for HomeTeam "
        string = bet + nr
        df_FL= df_FL.replace(to_replace =string, 
                            value ="AH1")
    df_FL["Player_1"] = df_FL["Player_1"] + df_FL["Bet_Type"].str.lower()
    df_FL["Player_2"] = df_FL["Player_2"] + df_FL["Bet_Type"].str.lower()
    df_FL = df_FL.drop("Date", axis=1)
    
    return df_FL

In [14]:
def get_soccer_data_ps_page():
    ###
    #Grab Asian Handicap Match data for SOCCER from PS3838/Pinnacle
    ###

    driver.switch_to.window(driver.window_handles[6])
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    team_list = []
    odds_list = []
    date_list = []
    time_list = []
    handicap_list = []

    for handicap_type in ["HDP_0", "HDP_1"]:
        try:
            bet_type = soup.find("div", {"id": handicap_type})

            for container in bet_type.findAll("tr", {"class": "status_O"}):
                # Get teams
                teams = [x.get_text().strip().replace(u'\u200e',"") for x in container.findAll("span", {"class": "team_name handicap"})]
                teams_handicap = [x.get_text().strip().replace(u'\u200e',"") for x in container.findAll("span", {"class": "o_middle"})]
                teams = ["".join([teams[0], " ", teams_handicap[0]]), "".join([teams[1], " ", teams_handicap[1]])]
                if len(teams) != 0:
                    team_list.append(teams)

                # Get odds
                odds = [x.get_text() for x in container.findAll("span", {"class": "o_right"})]

                if len(teams) != 0:
                    try:
                        #print(odds)
                        if odds != ['', '']:
                            odds_list.append(float(odd) for odd in odds)
                        else:
                            odds_list.append(["Nan", "Nan"])
                    except:
                        odds_list.append(["Nan", "Nan"])


                # Get date & time
                date = [x.get_text() for x in container.findAll("span", {"class": "DateTime"})]

                if date[0].count(' ') > 1:
                    date = [date[0]]
                    times = ["LIVE"]
                else:
                    times = [date[0][-5:]]
                    date = [date[0][:6]]

                if len(teams) != 0:
                    date_list.append(date)
                    time_list.append(times)

                # Get handicap type
                handicap_list.append(handicap_type)

        except:
            pass

    # Create DF
    df_PS = pd.DataFrame(team_list, columns=['Player_1', 'Player_2'])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', 'Odds_2'])
    df_dates = pd.DataFrame(date_list, columns=["Date"])
    df_times = pd.DataFrame(time_list, columns=['Time'])
    df_PS = df_PS.join(df_odds).join(df_dates).join(df_times)
    df_handicap = pd.DataFrame(handicap_list, columns=['Bet_Type'])

    df_PS["Time"] = df_PS["Date"] + " " + df_PS["Time"]
    df_PS["Bet_Type"] = df_handicap
    df_PS= df_PS.replace(to_replace ="HDP_0", 
                            value =" AHM")
    df_PS = df_PS.replace(to_replace ="HDP_1", 
                            value =" AH1")
    
    df_PS["Player_1"] = df_PS["Player_1"] + df_PS["Bet_Type"]
    df_PS["Player_2"] = df_PS["Player_2"] + df_PS["Bet_Type"]
    
    df_PS = df_PS.drop(["Date"], axis=1)

    return df_PS

def get_soccer_data_ps():
    data_1 = get_soccer_data_ps_page()
    time.sleep(2)
    driver.find_element_by_css_selector('.dateMenutb > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2)').click() 
    time.sleep(2)
    data_2 = get_soccer_data_ps_page()
    L = [data_1, data_2]
    df_PS_soccer = pd.concat(L)
    driver.find_element_by_css_selector('.dateMenutb > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(1)').click()
    
    return df_PS_soccer

In [15]:
def get_data_sb_page():  
    ###
    #Grab data from SBObet
    ###
    driver.switch_to.window(driver.window_handles[7])
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    team_list = []
    odds_list = []
    time_list = []
    date_list = []
    handicap_list = []
    titles_list = []
    rows = []
    game = []

    markets = soup.findAll("div", {"class": "MarketT"})

    for container in markets:
        handicap_name = container.span.text
        handicap_list.append(handicap_name)

        for container in container.findAll("tr"):
            teams = []
            for team in container.findAll("span", {"class": "OddsL"}):
                teams.append(team.text)

                handicaps = []
                for handicap in container.findAll("span", {"class": "OddsM"}):
                    handicaps.append(handicap.get_text())

                odds = []
                for odd in container.findAll("span", {"class": "OddsR"}):
                    odds.append(float(odd.get_text()))
                odds_list.append(odds)

                dates = []
                for time_block in container.findAll("div", {"class": "DateTimeDiv"}):
                    game_ = []
                    t = (time_block.get_text())[:-5]
                    time_list.append(t)
                    d = (time_block.get_text())[-5:]
                    date_list.append(d)

            rows.append([handicap_name, teams[0], teams[1], handicaps[0], handicaps[1], odds[0], odds[1], d, t])

    # Create DF
    df_SB = pd.DataFrame(rows, columns=["Bet_Type", "Player_1", "Player_2", "H_1", "H_2", "Odds_1", "Odds_2", "Time", "Date"])
    df_SB= df_SB.replace(to_replace ="Asian Handicap", value ="AHM")
    df_SB = df_SB.replace(to_replace ="First Half Asian Handicap", value ="AH1")
    df_SB["Player_1"] = df_SB["Player_1"] + " " + df_SB["H_1"] + " " + df_SB["Bet_Type"]
    df_SB["Player_2"] = df_SB["Player_2"] + " " + df_SB["H_2"] + " " + df_SB["Bet_Type"]
    df_SB["Time"] = df_SB["Time"] + " " + df_SB["Date"]
    df_SB = df_SB.drop(["H_1", "H_2", "Date"], axis=1)

    return df_SB

def get_data_sb():
    data_1 = get_data_sb_page()
    time.sleep(2)
    driver.find_element_by_css_selector('#bu\:od\:go\:dt\:3').click() 
    time.sleep(5)
    driver.find_element_by_css_selector('#bu\:od\:go\:mt\:2').click()
    time.sleep(3)
    data_2 = get_data_sb_page()
    L = [data_1, data_2]
    df_SB = pd.concat(L)
    driver.find_element_by_css_selector('#bu\:od\:go\:dt\:2').click()
    time.sleep(1)
    driver.find_element_by_css_selector('#bu\:od\:go\:mt\:2').click()
    
    return df_SB

In [16]:
def get_data_BL():
    
    driver.switch_to.window(driver.window_handles[8])
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    handicap_list = []
    team_list = []
    odds_list = []
    ah_odds_list = []
    date_list = []
    time_list = []
    competition_list = []
    rows = []

    markets = soup.findAll("div", {"class": "market"})
    
    # handicap data
    for container in markets:
        for day in container.findAll("span", {"class": "name"}):
            day = day.text
            date_list.append(day)
        for competition in container.findAll("div", {"class": "name"}):
            competition = competition.text
            competition_list.append(competition)
        for time_ in container.findAll("div", {"class": "time"}):
            time_ = time_.text
            time_list.append(time_)
        for match in container.findAll("span", {"class": "info"}):
            for home in match.findAll("div", {"class": "home"}):
                home = home.text
                home = home.split(" - ")[1]
            for away in match.findAll("div", {"class": "away"}):
                away = away.text
                away = away.split(" - ")[1]
            team_list.append([home, away])

        for odd_panel in container.findAll("div", {"class": "offer-group timeWin_tp_all_ml default"}):
            if len(odd_panel.text) > 0:
                for odd_1 in odd_panel.findAll("span", {"class": "offer-price"})[:1]:
                    odd_1 = odd_1.get_text()
                    if odd_1 == "⇆":
                        odd_1 = "NaN"
                    else:
                        odd_1 = float(odd_1)
                    
                for odd_2 in odd_panel.findAll("span", {"class": "offer-price"})[1:]:
                    odd_2 = odd_2.get_text()
                    if odd_2 == "⇆":
                        odd_2 = "NaN"
                    else:
                        odd_2 = float(odd_2)
                odds_list.append([(odd_1), (odd_2)])
            else:
                odds_list.append(["NaN", "NaN"])

    df_BL = pd.DataFrame(time_list, columns=['Time'])
    df_players = pd.DataFrame(team_list, columns=['Player_1', "Player_2"])
    df_odds = pd.DataFrame(odds_list, columns=['Odds_1', "Odds_2"])
    df_BL = df_BL.join(df_players).join(df_odds)
    df_BL["Bet_Type"] = "1/2"
    df_BL[["Odds_1", "Odds_2"]] = df_BL[["Odds_1", "Odds_2"]].apply(pd.to_numeric, errors='coerce')
    
    return df_BL

In [17]:
def save_to_csv():
    ###
    #Saving single DFs as single CSV
    ###
    
    df_1X.to_csv("df_1X.csv")
    df_BB.to_csv("df_BB.csv")
    df_OX.to_csv("df_OX.csv")
    df_PS.to_csv("df_PS.csv")
    df_FL.to_csv("df_FL.csv")
    df_FL_soccer.to_csv("df_FL_soccer.csv")
    df_PS_soccer.to_csv("df_PS_soccer.csv")
    df_SB_soccer.to_csv("df_SB_soccer.csv")
    df_BL.to_csv("df_BL.csv")

In [18]:
def cleanup(df):
    ###
    #Preparing strings for comparison
    ###
    for column in ['Player_1', 'Player_2']:
        df[column] = df[column].str.lower()
        df[column] = df[column].str.replace("imperial pro gaming",'ipg') #replacing some names
        df[column] = df[column].str.replace("imperialpro",'ipg') #replacing some names
        df[column] = df[column].str.replace("five",'5') #replacing some names
        df[column] = df[column].str.replace("sv ata spor",'turkgucu') #replacing some names
        df[column] = df[column].str.replace("munich",'munchen') #replacing some names
        for word in ["team", "esports", "gaming", "focusme", "e-sports", "esport", "clan", "academy", "challenger","!", "spvgg", "05 (n)", "kfc", "fsv ", "tus rw ", "rot-weiss", " sc", "vfb ", " ksc", "sc ", "tsv ", "fc ", " fc"]: 
            df[column] = df[column].str.replace(word,'') #deleting every annoying word
    return df

In [21]:
def merging():
    #Creating Match column
    for df in [df_BL, df_1X, df_BB, df_OX, df_PS, df_FL, df_SB_soccer, df_PS_soccer, df_FL_soccer]:
        df["Match"] = df["Player_1"] + " - " + df["Player_2"]

    #Merge DFs on Match that is in both DFs
    df_DB1 = pd.merge(df_1X, df_FL, on = "Match", how = "inner")
    df_DB2 = pd.merge(df_OX, df_FL, on = "Match", how = "inner")
    df_DB3 = pd.merge(df_FL, df_BB, on = "Match", how = "inner")
    df_DB4 = pd.merge(df_FL, df_PS, on = "Match", how = "inner")
    df_DB5 = pd.merge(df_1X, df_BB, on = "Match", how = "inner")
    #df_DB6 = pd.merge(df_PB, df_BB, on = "Match", how = "inner")
    #df_DB7 = pd.merge(df_OX, df_LB, on = "Match", how = "inner")
    #df_DB8 = pd.merge(df_OX, df_PB, on = "Match", how = "inner")
    df_DB9 = pd.merge(df_OX, df_BB, on = "Match", how = "inner")
    df_DB10 = pd.merge(df_OX, df_1X, on = "Match", how = "inner")
    df_DB11 = pd.merge(df_PS, df_1X, on = "Match", how = "inner")
    df_DB12 = pd.merge(df_PS, df_OX, on = "Match", how = "inner")
    #df_DB13 = pd.merge(df_PS, df_LB, on = "Match", how = "inner")
    #df_DB14 = pd.merge(df_PS, df_PB, on = "Match", how = "inner")
    df_DB15 = pd.merge(df_PS, df_BB, on = "Match", how = "inner")
    df_DB17 = pd.merge(df_FL_soccer, df_SB_soccer, on = "Match", how = "inner")
    df_DB16 = pd.merge(df_PS_soccer, df_SB_soccer, on = "Match", how = "inner")
    df_DB18 = pd.merge(df_PS_soccer, df_FL_soccer, on = "Match", how = "inner")
    df_DB19 = pd.merge(df_BL, df_1X, on = "Match", how = "inner")
    df_DB20 = pd.merge(df_BL, df_BB, on = "Match", how = "inner")
    df_DB21 = pd.merge(df_BL, df_FL, on = "Match", how = "inner")
    df_DB22 = pd.merge(df_BL, df_OX, on = "Match", how = "inner")
    df_DB23 = pd.merge(df_BL, df_PS, on = "Match", how = "inner")
    
    df_DB24 = pd.merge(df_BL, df_BL, on = "Match", how = "inner")
    df_DB25 = pd.merge(df_PS, df_PS, on = "Match", how = "inner")
    df_DB26 = pd.merge(df_1X, df_1X, on = "Match", how = "inner")
    df_DB27 = pd.merge(df_OX, df_OX, on = "Match", how = "inner")
    df_DB28 = pd.merge(df_BB, df_BB, on = "Match", how = "inner")
    df_DB29 = pd.merge(df_FL, df_FL, on = "Match", how = "inner")

    df_DB = pd.DataFrame(())
    #, df_DB16
    for i in [df_DB1, df_DB2, df_DB3, df_DB4, df_DB5, df_DB9, df_DB10, df_DB11, df_DB12, df_DB15, df_DB16, df_DB17, df_DB18, df_DB19, df_DB20, df_DB21, df_DB22, df_DB23, df_DB24, df_DB25, df_DB26, df_DB27, df_DB28, df_DB29]:
        df_DB = df_DB.append(i)

    df_DB = df_DB.reset_index()
    df_DB.drop("index", axis=1)

    # Finding the biggest odds
    df_odds_1 = pd.DataFrame()
    df_odds_1["Odds_1_x"] = df_DB["Odds_1_x"].astype(float)
    df_odds_1["Odds_1_y"] = df_DB["Odds_1_y"].astype(float)
    df_odds_1["max_1"] = df_odds_1.max(axis=1)
    df_odds_2 = pd.DataFrame()
    df_odds_2["Odds_2_x"] = df_DB["Odds_2_x"].astype(float)
    df_odds_2["Odds_2_y"] = df_DB["Odds_2_y"].astype(float)
    df_odds_2["max_2"] = df_odds_2.max(axis=1)

    # and stiching them to the main DF df_DB
    df_DB = df_DB.join(df_odds_1["max_1"]).join(df_odds_2["max_2"])

    # Calculating payout for odds and lays and highlighting good matches
    df_DB["Payout"] = (1/df_DB["max_1"] + 1/df_DB["max_2"]) * 100
    
    df_DB.loc[df_DB["Lay_1"] > 0, "Payout_Lay_1"] = df_DB["max_1"]/df_DB["Lay_1"] * 100
    df_DB.loc[df_DB["Lay_2"] > 0, "Payout_Lay_2"] = df_DB["max_2"]/df_DB["Lay_2"] * 100
    df_DB["Timestamp"] = datetime.now()
    
    # Saving new data to csv file
    df_DB.to_csv("df_DB_temp.csv", header=False, index=False)
    file = open('df_DB.csv', 'a',  encoding="utf8")
    new_data = open('df_DB_temp.csv','r', encoding="utf8")
    for row in new_data:
        file.write(row)
    file.close()
    new_data.close()
    time.sleep(1)
    os.remove("df_DB_temp.csv") 
    
    # Drop all odds with no use like with a Payout above 101
    df_DB = df_DB.drop(df_DB[df_DB["Payout_Lay_1"].astype(str) == "nan"].index & df_DB[df_DB["Payout_Lay_2"].astype(str) == "nan"].index & df_DB[df_DB["Payout"].astype(str) == "nan"].index)
    df_DB = df_DB.drop(df_DB[df_DB["Payout"] > 101].index)
    df_DB = df_DB.sort_values("Payout")
    df_DB = df_DB.drop(columns=["index", "Player_1_y", "Player_2_y", "Player_1_x", "Player_2_x", "Lay_1_x", "Lay_2_x", "Lay_1_y", "Lay_2_y"])
    
    return df_DB

In [22]:
def highlight(x):
    ###
    ### Colours the background in $colour
    ###
    c1 = 'background-color: palegreen'
    c2 = 'background-color: lightcoral'
    c0 = '' # if no default colors
    mask1 = (x["Payout"] < 100)
    mask2 = (x["Payout_Lay_1"] > 100)
    mask3 = (x["Payout_Lay_2"] > 100)
    df1 = pd.DataFrame(c0, index=x.index, columns=x.columns)
    df1.loc[mask1, 'Payout'] = c1
    df1.loc[mask2, 'Payout_Lay_1'] = c2
    df1.loc[mask3, 'Payout_Lay_2'] = c2
    
    return df1

In [23]:
def run():
    ###
    ### 
    ###
    start_time = datetime.now()

    df_BL = get_data_BL()
    
    try:
        df_FL = get_data_fl()
    except:
        pass
    try:
        df_SB_soccer = get_data_sb_page()
    except:
        pass
    
    df_PS_soccer = get_soccer_data_ps()
    df_FL_soccer = get_data_fl_soccer()
    df_1X = get_data_1x()
    df_BB = get_data_bb()
    df_OX = get_data_ox()
    df_PS = get_data_ps()

    print("Webscraping done.")

    df_results = pd.DataFrame()
    df_BL.name = "BL"
    df_FL.name = "FL"
    df_FL_soccer.name = "FL_soccer"
    df_1X.name = "1X"
    df_BB.name = "BB"
    df_OX.name = "OX"
    df_PS.name = "PS"
    df_PS_soccer.name = "PS_soccer"
    df_SB_soccer.name = "SB_soccer"

    for i in [df_BL, df_1X, df_BB, df_OX, df_PS, df_FL, df_FL_soccer, df_PS_soccer, df_SB_soccer]:
        i["Book"] = i.name

    save_to_csv()
    print("Saving done.")

    cleanup(df_BL)
    cleanup(df_1X)
    cleanup(df_FL)
    cleanup(df_FL_soccer)
    cleanup(df_BB)
    cleanup(df_OX)
    cleanup(df_PS)
    cleanup(df_PS_soccer)
    cleanup(df_SB_soccer)
    print("Done cleaning")
    print("Ran at: ", pd.to_datetime(datetime.now()).round('10s'))
    print("Time to run: ", (datetime.now() - start_time))

    #Sanity check for all DFs:
    for i in [df_BL, df_1X,df_BB, df_OX, df_PS, df_FL, df_FL_soccer, df_PS_soccer, df_SB_soccer]:
        print(i.name, len(i))

    display(HTML(data="""<style>div#notebook-container    { width: 95%; }</style>"""))

    df = merging()
    return df

### Finding good matches

In [None]:
start_time = datetime.now()

# Webscraping starts here
df_BL = get_data_BL()
df_FL = get_data_fl()
df_FL_soccer = get_data_fl_soccer()
df_SB_soccer = get_data_sb_page()
df_PS_soccer = get_soccer_data_ps()
df_1X = get_data_1x()
df_BB = get_data_bb()
df_OX = get_data_ox()
df_PS = get_data_ps()

print("Webscraping done.")

df_results = pd.DataFrame()
df_BL.name = "BL"
df_FL.name = "FL"
df_FL_soccer.name = "FL_soccer"
df_1X.name = "1X"
df_BB.name = "BB"
df_OX.name = "OX"
df_PS.name = "PS"
df_PS_soccer.name = "PS_soccer"
df_SB_soccer.name = "SB_soccer"

for i in [df_BL, df_1X, df_BB, df_OX, df_PS, df_FL, df_FL_soccer, df_PS_soccer, df_SB_soccer]:
    i["Book"] = i.name

save_to_csv()
print("Saving done.")

# Data cleaning starts here
cleanup(df_BL)
cleanup(df_1X)
cleanup(df_FL)
cleanup(df_FL_soccer)
cleanup(df_BB)
cleanup(df_OX)
cleanup(df_PS)
cleanup(df_PS_soccer)
cleanup(df_SB_soccer)

print("Done cleaning")
print("Ran at: ", pd.to_datetime(datetime.now()).round('10s'))
print("Run time: ", (datetime.now() - start_time))

#Sanity check for all DFs:
for i in [df_BL, df_1X,df_BB, df_OX, df_PS, df_FL, df_FL_soccer, df_PS_soccer, df_SB_soccer]:
    print(i.name, len(i))

# All results are also saved in an external html file "Arby_dataframe" which can be viewed with a browser
display(HTML(data="""<style>div#notebook-container    { width: 95%; }</style>"""))

df = merging()
df.to_html("Arby_dataframe.html")
df.style.apply(highlight, axis=None)

In [None]:
# This code runs the above scraping process every 5 minutes and saves the results in an html file
status = 1

while status == 1:
    clear_output(wait=True)
    df = run()
    df.to_html("Arby_dataframe.html")
    time.sleep(300)

### Search in ALL games in DFs:

In [135]:
# All DFs get combined so you can search thoroughly within all games
df_all = pd.DataFrame() 

for i in [df_BL, df_1X, df_BB, df_OX, df_PS, df_FL, df_FL_soccer, df_PS_soccer, df_SB_soccer]:
    i["Book"] = i.name
    df_all = df_all.append(i)

# Search for teams or matches
#df_all[df_all["Player_1" or "Player_2"] == "dwg kia"]
df_all[df_all["Match"] == "epic - vindicta"]

In [None]:
# Or filter by Bookie
df_all[df_all["Book"] == "FL_soccer"]

### Saving Selenium Profile in permanent local folder

In [None]:
print("Get FF Temp Profile Path")
driver.get("about:support")
box = driver.find_element_by_id("profile-dir-box")
ffTempProfilePath = box.text
print("ffTempProfilePath: ",ffTempProfilePath)

In [None]:
# now do your stuff

# copy ur stuff after use or periodically
print("safe Profile")
cwd = os.getcwd()
pfadffprofile = cwd+"\\"+"ffprofile.selenium"
print ("saving profile " + ffTempProfilePath + " to " + pfadffprofile)
os.system("xcopy " + ffTempProfilePath + " " + pfadffprofile)
print ("files should be copied :/") 