In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
import pandas as pd
import re
from datetime import datetime


def extract_text(s):
    # Define a regular expression pattern to match the desired components
    pattern = r'o(\d+\.\d+)\s*([-+]?\d+)\nu(\d+\.\d+)\s*([-+]\d+)'

    # Use re.findall to find all occurrences of the pattern in the string
    matches = re.findall(pattern, s)

    # Extract the matched groups from each occurrence
    # results = [(float(match[0]), int(match[1]), match[2], int(match[3])) for match in matches]
    if len(matches) == 0:
        return []
    # print(matches[0])
    return [float(matches[0][0]),int(matches[0][1]),float(matches[0][2]),int(matches[0][3])]

def oavgc(row):
    o = {}
    o2 = {}
    for c in df.columns:
        if "o" in row[c]:
            results = extract_text(row[c])
            if len(results) != 4:
                continue
            if results[0] in o:
                o[results[0]].append(results[1])
            else:
                o[results[0]] = [results[1]]
    for p in o.keys():
        o2[p] = round(sum(o[p]) / len(o[p]),1)
    return o2

def uavgc(row):
    o = {}
    o2 = {}
    for c in df.columns:
        if "u" in row[c]:
            results = extract_text(row[c])
            if len(results) != 4:
                continue
            if results[0] in o:
                o[results[0]].append(results[3])
            else:
                o[results[0]] = [results[3]]
    for p in o.keys():
        o2[p] = round(sum(o[p]) / len(o[p]),1)
    return o2

def cleanpd(s):
    o = s.replace("  ", " ,").replace("\no", "\n,o").replace("\n ", "\n, ")
    return(o.split(","))

# Assuming you have set up your WebDriver, such as ChromeDriver
driver = webdriver.Chrome()

# Navigate to your webpage
driver.get("https://unabated.com/nba/props")
driver.execute_script("document.body.style.zoom='30%'")
time.sleep(0.3)

# Find names of sportsbooks used
col_ids = []
id2sbnames = {}
sbln = driver.find_elements(By.CSS_SELECTOR, ".ag-header-container > div:nth-child(1) > div")
for sb in sbln:
    col_ids.append(sb.get_attribute("col-id"))
# print(col_ids)
sbtitle = sb.find_elements(By.XPATH, "//div/div[@style and @title]")
for n,s in enumerate(sbtitle):
    tit = s.get_attribute("title")
    if "Real" in tit or "Low" in tit:
        titsho = tit.split(" \n")[0]
        id2sbnames[col_ids[n-1]] = titsho
        # print("tit",tit)
    
# print(id2sbnames)

# Find the first div with class .ag-center-cols-viewport
viewport_div = driver.find_element(By.CSS_SELECTOR,'.ag-center-cols-viewport')
name_div = driver.find_element(By.CSS_SELECTOR,'.ag-pinned-left-cols-container')

# Keep track of the number of child divs found
last_child_count = 0
player_data = {}
id2name = {}
visited_pages = []
pagebutton = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > button:nth-child(1)")
driver.execute_script("arguments[0].click();", pagebutton)
dropdowndiv = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > div:nth-child(2)")
pages = dropdowndiv.find_elements(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > div:nth-child(2) > button")
findpage = True

while True:
    prevpages = visited_pages
    # print("WHILE STARTED", visited_pages)
    for page in pages:
        attempts = 0
        parsed = False
        while(attempts < 5 and not parsed):
            try:
                page_name = page.text
                parsed = True
                break
            except:
                attempts += 1
        if page_name not in visited_pages and findpage:
            print("Scanning page:", page_name)
            break
            
    visited_pages.append(page_name)
    time.sleep(0.5)
    driver.execute_script("arguments[0].click();", page)
    # print("CLICKED")
    time.sleep(3)

    while True:

        ## FIND ALL ROWS AND CELLS
        child_divs = viewport_div.find_elements(By.CSS_SELECTOR, '.ag-center-cols-container > div')
        id_child = name_div.find_elements(By.CSS_SELECTOR, ".ag-pinned-left-cols-container > div")
        for index, row in enumerate(child_divs):
            try:
                cells = row.find_elements(By.CSS_SELECTOR, "div.ag-cell.ag-cell-not-inline-editing.ag-cell-auto-height.ag-cell-value")
                player_id = row.get_attribute('row-id')
                player_data[player_id] = {}
                for cell in cells:
                    col_id = cell.get_attribute("col-id")
                    player_data[player_id][col_id] = cell.text
                    print(col_id, cell.text)
            except:
                pass
        
        
        ## GET PLAYER NAME
        for index, div in enumerate(id_child, start=1):
            id2name[div.get_attribute('row-id')] = div.text
                
        ## CHECK FOR BREAK CONDITIONS
        if len(player_data.keys()) == last_child_count:
            break
        last_child_count = len(player_data.keys())

        ## SCROLL
        time.sleep(0.3)
        actions = ActionChains(driver)
        body = child_divs[-1]
        actions.move_to_element(body)
        try:
            actions.click_and_hold().move_by_offset(0, 100).release().perform()
        except:
            pass

    labeled_player_data = {}

    for key in player_data:
        pname = id2name[key].split("\nEdit")[0].replace("\n", " ")
        labeled_player_data[pname] = {}
        for key2 in player_data[key]:
            labeled_player_data[pname][id2sbnames[key2]] = player_data[key][key2]

    df = pd.DataFrame(labeled_player_data).T
    df['Over'] = df.apply(oavgc, axis=1)
    df['Under'] = df.apply(uavgc, axis=1)
    now = datetime.now()
    date_time_str = now.strftime("Date %Y-%m-%d.  Hour %H")
    df.to_csv(page_name + " " + date_time_str + '.csv')
    
    ## TIME TO FIND A NEW PAGE TO CLICK
    pagebutton = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > button:nth-child(1)")
    driver.execute_script("arguments[0].click();", pagebutton)
    time.sleep(0.5)
    attempts = 0
    parsed = False
    while(attempts < 20 and not parsed):
        try:
            dropdowndiv = driver.find_element(By.XPATH, "//div[@class='ml-2 dropdown-menu show']")
            parsed = True
            break
        except:
            attempts += 1
    
    pages = dropdowndiv.find_elements(By.TAG_NAME, "button")
    print("_________________")
    for page in pages:
        attempts = 0
        parsed = False
        while(attempts < 5 and not parsed):
            try:
                page_name = page.text
                parsed = True
                break
            except:
                attempts += 1
        if page_name not in visited_pages:
            print("Found new page:", page_name)
            break
    
    if len(visited_pages) == len(pages) or prevpages == len(visited_pages):
        print("All pages processed")
        break
    
        
# Loop to keep scrolling down until no more new child divs are loaded


# Finally, remember to close the browser window when done
driver.quit()


Scanning page: 3pts Made
8   
7 o1.5 -168
u1.5 +123
67   
78   
84   
2 o1.5 -158
u1.5 +124
1 o1.5 -175
u1.5 +135
4   
20 o1.5 -156
u1.5 +114
3   
9 o1.5 -169
u1.5 +129
10 o1.5 -169
u1.5 +129
31 o1.5 -180
u1.5 +133
27 o1.5 -180
u1.5 +133
13   
17 o1.5 -180
u1.5 +133
54 o1.5 -169
u1.5 +129
22 o1.5 -180
u1.5 +133
25 o1.5 -180
u1.5 +133
8   
7 o3.5 +149
u3.5 -204
67   
78 o3.5 +130
u3.5 -160
84   
2 o3.5 +124
u3.5 -158
1 o3.5 +130
u3.5 -166
4   
20 o3.5 +118
u3.5 -164
3 o3.5 +135
u3.5 -175
9 o3.5 +124
u3.5 -161
10 o3.5 +124
u3.5 -161
31 o3.5 +110
u3.5 -148
27 o3.5 +110
u3.5 -148
13 o3.5 +115
u3.5 -175
17 o3.5 +120
u3.5 -162
54 o3.5 +124
u3.5 -161
22 o3.5 +110
u3.5 -148
25 o3.5 +110
u3.5 -148
8   
7 o1.5 +126
u1.5 -174
67   
78 o1.5 +155
u1.5 -190
84 o1.5 -112
u1.5 -112
2 o1.5 +126
u1.5 -162
1 o1.5 +135
u1.5 -175
4   
20 o1.5 +118
u1.5 -164
3 o1.5 +140
u1.5 -180
9 o1.5 +132
u1.5 -172
10 o1.5 +132
u1.5 -172
31 o1.5 +125
u1.5 -167
27 o1.5 +125
u1.5 -167
13   
17 o1.5 +125
u1.5 -167
54 o1.5 +

Unnamed: 0,Bookmaker,Sharp Book Price,Sporttrade,Bet365,DraftKings Pick6,FanDuel,DraftKings,BetMGM,Caesars,PointsBet,...,Bovada,Unibet,Sugarhouse,888sports,BetRivers,SportsBetting,Four Winds,Parx,Over,Under
2/15 8:30 PM Jaren Jackson(F-C) MEM vs. MIL,,o1.5 -168\nu1.5 +123,,,,o1.5 -158\nu1.5 +124,o1.5 -175\nu1.5 +135,,o1.5 -156\nu1.5 +114,,...,o1.5 -169\nu1.5 +129,o1.5 -180\nu1.5 +133,o1.5 -180\nu1.5 +133,,o1.5 -180\nu1.5 +133,o1.5 -169\nu1.5 +129,o1.5 -180\nu1.5 +133,o1.5 -180\nu1.5 +133,{1.5: -172.0},{1.5: 129.0}
2/15 8:30 PM Luke Kennard(G-F) MEM vs. MIL,,o3.5 +149\nu3.5 -204,,o3.5 +130\nu3.5 -160,,o3.5 +124\nu3.5 -158,o3.5 +130\nu3.5 -166,,o3.5 +118\nu3.5 -164,o3.5 +135\nu3.5 -175,...,o3.5 +124\nu3.5 -161,o3.5 +110\nu3.5 -148,o3.5 +110\nu3.5 -148,o3.5 +115\nu3.5 -175,o3.5 +120\nu3.5 -162,o3.5 +124\nu3.5 -161,o3.5 +110\nu3.5 -148,o3.5 +110\nu3.5 -148,{},{}
2/15 8:30 PM Jae Crowder(F) MIL @ MEM,,o1.5 +126\nu1.5 -174,,o1.5 +155\nu1.5 -190,o1.5 -112\nu1.5 -112,o1.5 +126\nu1.5 -162,o1.5 +135\nu1.5 -175,,o1.5 +118\nu1.5 -164,o1.5 +140\nu1.5 -180,...,o1.5 +132\nu1.5 -172,o1.5 +125\nu1.5 -167,o1.5 +125\nu1.5 -167,,o1.5 +125\nu1.5 -167,o1.5 +132\nu1.5 -172,o1.5 +125\nu1.5 -167,o1.5 +125\nu1.5 -167,{1.5: -112.0},{1.5: -112.0}
2/15 8:30 PM Damian Lillard(G) MIL @ MEM,,o2.5 -174\nu2.5 +128,,o2.5 -165\nu2.5 +135,,o3.5 +130\nu3.5 -166,o2.5 -166\nu2.5 +130,,o2.5 -167\nu2.5 +120,o3.5 -160\nu3.5 +120,...,o2.5 -167\nu2.5 +128,o3.5 +135\nu3.5 -182,o3.5 +135\nu3.5 -182,o2.5 -175\nu2.5 +120,o3.5 +135\nu3.5 -182,o2.5 -167\nu2.5 +128,o3.5 +135\nu3.5 -182,o3.5 +135\nu3.5 -182,"{2.5: -168.5, 3.5: -160.0}","{2.5: 127.1, 3.5: 120.0}"
2/15 8:30 PM AJ Green(G) MIL @ MEM,,o1.5 -112\nu1.5 -118,,o1.5 +105\nu1.5 -135,,,,,,o1.5 +105\nu1.5 -140,...,,,,,,,,,{1.5: -112.0},{1.5: -118.0}
2/15 8:30 PM Malik Beasley(G) MIL @ MEM,,o2.5 -148\nu2.5 +111,,o2.5 -145\nu2.5 +115,,o2.5 -128\nu2.5 +100,o2.5 -150\nu2.5 +120,,o2.5 -152\nu2.5 +110,o2.5 -140\nu2.5 +105,...,o2.5 -147\nu2.5 +113,o2.5 -155\nu2.5 +116,o2.5 -155\nu2.5 +116,o2.5 -158\nu2.5 +105,o2.5 -155\nu2.5 +116,o2.5 -147\nu2.5 +113,o2.5 -155\nu2.5 +116,o2.5 -155\nu2.5 +116,{2.5: -149.1},{2.5: 112.3}
2/15 8:30 PM Pat Connaughton(G) MIL @ MEM,,,,o0.5 -220\nu0.5 +170,,,o0.5 -215\nu0.5 +165,,o0.5 -233\nu0.5 +165,o0.5 -225\nu0.5 +165,...,o0.5 -222\nu0.5 +167,,,,,o0.5 -222\nu0.5 +167,,,{0.5: -222.7},{0.5: 166.6}
2/15 8:30 PM Bobby Portis(F) MIL @ MEM,,o1.5 +159\nu1.5 -223,,o1.5 +165\nu1.5 -210,,o1.5 +130\nu1.5 -166,o1.5 +160\nu1.5 -210,,o1.5 +146\nu1.5 -204,o1.5 +150\nu1.5 -200,...,o1.5 +155\nu1.5 -204,,,o1.5 +145\nu1.5 -222,,o1.5 +155\nu1.5 -204,,,{},{}
2/15 8:30 PM Giannis Antetokounmpo MIL @ MEM,,,,o0.5 +145\nu0.5 -180,,,o0.5 +145\nu0.5 -190,,o0.5 +139\nu0.5 -192,o0.5 +165\nu0.5 -225,...,o0.5 +141\nu0.5 -185,,,o0.5 +130\nu0.5 -200,,o0.5 +141\nu0.5 -185,,,{},{}
2/15 8:30 PM Patrick Beverley(G) MIL @ MEM,,,,o0.5 -130\nu0.5 +100,,,o0.5 -135\nu0.5 +105,,o0.5 -133\nu0.5 -103,o0.5 -140\nu0.5 +105,...,o0.5 -130\nu0.5 +100,,,,,o0.5 -130\nu0.5 +100,,,{0.5: -132.6},{0.5: 72.4}


KeyboardInterrupt: Interrupted by user

In [7]:
def extract_text(s):
    # Define a regular expression pattern to match the desired components
    print("IN", s)
    pattern = r'o(\d+\.\d+)\s*([-+]?\d+)\nu(\d+\.\d+)\s*([-+]\d+)'

    # Use re.findall to find all occurrences of the pattern in the string
    matches = re.findall(pattern, s)

    # Extract the matched groups from each occurrence
    # results = [(float(match[0]), int(match[1]), match[2], int(match[3])) for match in matches]
    if len(matches) == 0:
        return []
    # print(matches[0])
    print("OUT", [float(matches[0][0]),int(matches[0][1]),float(matches[0][2]),int(matches[0][3])])
    return [float(matches[0][0]),int(matches[0][1]),float(matches[0][2]),int(matches[0][3])]


df = pd.DataFrame(labeled_player_data).T
df['Over2'] = df.apply(oavgc, axis=1)
df['Under2'] = df.apply(uavgc, axis=1)
display(df)

IN o1.5 -168
u1.5 +123
OUT [1.5, -168, 1.5, 123]
IN o1.5 -158
u1.5 +124
OUT [1.5, -158, 1.5, 124]
IN o1.5 -175
u1.5 +135
OUT [1.5, -175, 1.5, 135]
IN o1.5 -156
u1.5 +114
OUT [1.5, -156, 1.5, 114]
IN o1.5 -169
u1.5 +129
OUT [1.5, -169, 1.5, 129]
IN o1.5 -169
u1.5 +129
OUT [1.5, -169, 1.5, 129]
IN o1.5 -180
u1.5 +133
OUT [1.5, -180, 1.5, 133]
IN o1.5 -180
u1.5 +133
OUT [1.5, -180, 1.5, 133]
IN o1.5 -180
u1.5 +133
OUT [1.5, -180, 1.5, 133]
IN o1.5 -169
u1.5 +129
OUT [1.5, -169, 1.5, 129]
IN o1.5 -180
u1.5 +133
OUT [1.5, -180, 1.5, 133]
IN o1.5 -180
u1.5 +133
OUT [1.5, -180, 1.5, 133]
IN o3.5 +149
u3.5 -204
OUT [3.5, 149, 3.5, -204]
IN o3.5 +130
u3.5 -160
OUT [3.5, 130, 3.5, -160]
IN o3.5 +124
u3.5 -158
OUT [3.5, 124, 3.5, -158]
IN o3.5 +130
u3.5 -166
OUT [3.5, 130, 3.5, -166]
IN o3.5 +118
u3.5 -164
OUT [3.5, 118, 3.5, -164]
IN o3.5 +135
u3.5 -175
OUT [3.5, 135, 3.5, -175]
IN o3.5 +124
u3.5 -161
OUT [3.5, 124, 3.5, -161]
IN o3.5 +124
u3.5 -161
OUT [3.5, 124, 3.5, -161]
IN o3.5 +110
u3.5 -1

Unnamed: 0,Bookmaker,Sharp Book Price,Sporttrade,Bet365,DraftKings Pick6,FanDuel,DraftKings,BetMGM,Caesars,PointsBet,...,Bovada,Unibet,Sugarhouse,888sports,BetRivers,SportsBetting,Four Winds,Parx,Over2,Under2
2/15 8:30 PM Jaren Jackson(F-C) MEM vs. MIL,,o1.5 -168\nu1.5 +123,,,,o1.5 -158\nu1.5 +124,o1.5 -175\nu1.5 +135,,o1.5 -156\nu1.5 +114,,...,o1.5 -169\nu1.5 +129,o1.5 -180\nu1.5 +133,o1.5 -180\nu1.5 +133,,o1.5 -180\nu1.5 +133,o1.5 -169\nu1.5 +129,o1.5 -180\nu1.5 +133,o1.5 -180\nu1.5 +133,{1.5: -172.0},{1.5: 129.0}
2/15 8:30 PM Luke Kennard(G-F) MEM vs. MIL,,o3.5 +149\nu3.5 -204,,o3.5 +130\nu3.5 -160,,o3.5 +124\nu3.5 -158,o3.5 +130\nu3.5 -166,,o3.5 +118\nu3.5 -164,o3.5 +135\nu3.5 -175,...,o3.5 +124\nu3.5 -161,o3.5 +110\nu3.5 -148,o3.5 +110\nu3.5 -148,o3.5 +115\nu3.5 -175,o3.5 +120\nu3.5 -162,o3.5 +124\nu3.5 -161,o3.5 +110\nu3.5 -148,o3.5 +110\nu3.5 -148,{3.5: 122.2},{3.5: -162.6}
2/15 8:30 PM Jae Crowder(F) MIL @ MEM,,o1.5 +126\nu1.5 -174,,o1.5 +155\nu1.5 -190,o1.5 -112\nu1.5 -112,o1.5 +126\nu1.5 -162,o1.5 +135\nu1.5 -175,,o1.5 +118\nu1.5 -164,o1.5 +140\nu1.5 -180,...,o1.5 +132\nu1.5 -172,o1.5 +125\nu1.5 -167,o1.5 +125\nu1.5 -167,,o1.5 +125\nu1.5 -167,o1.5 +132\nu1.5 -172,o1.5 +125\nu1.5 -167,o1.5 +125\nu1.5 -167,{1.5: 113.9},{1.5: -167.2}
2/15 8:30 PM Damian Lillard(G) MIL @ MEM,,o2.5 -174\nu2.5 +128,,o2.5 -165\nu2.5 +135,,o3.5 +130\nu3.5 -166,o2.5 -166\nu2.5 +130,,o2.5 -167\nu2.5 +120,o3.5 -160\nu3.5 +120,...,o2.5 -167\nu2.5 +128,o3.5 +135\nu3.5 -182,o3.5 +135\nu3.5 -182,o2.5 -175\nu2.5 +120,o3.5 +135\nu3.5 -182,o2.5 -167\nu2.5 +128,o3.5 +135\nu3.5 -182,o3.5 +135\nu3.5 -182,"{2.5: -168.5, 3.5: 92.1}","{2.5: 127.1, 3.5: -136.6}"
2/15 8:30 PM AJ Green(G) MIL @ MEM,,o1.5 -112\nu1.5 -118,,o1.5 +105\nu1.5 -135,,,,,,o1.5 +105\nu1.5 -140,...,,,,,,,,,{1.5: 32.7},{1.5: -131.0}
2/15 8:30 PM Malik Beasley(G) MIL @ MEM,,o2.5 -148\nu2.5 +111,,o2.5 -145\nu2.5 +115,,o2.5 -128\nu2.5 +100,o2.5 -150\nu2.5 +120,,o2.5 -152\nu2.5 +110,o2.5 -140\nu2.5 +105,...,o2.5 -147\nu2.5 +113,o2.5 -155\nu2.5 +116,o2.5 -155\nu2.5 +116,o2.5 -158\nu2.5 +105,o2.5 -155\nu2.5 +116,o2.5 -147\nu2.5 +113,o2.5 -155\nu2.5 +116,o2.5 -155\nu2.5 +116,{2.5: -149.1},{2.5: 112.3}
2/15 8:30 PM Pat Connaughton(G) MIL @ MEM,,,,o0.5 -220\nu0.5 +170,,,o0.5 -215\nu0.5 +165,,o0.5 -233\nu0.5 +165,o0.5 -225\nu0.5 +165,...,o0.5 -222\nu0.5 +167,,,,,o0.5 -222\nu0.5 +167,,,{0.5: -222.7},{0.5: 166.6}
2/15 8:30 PM Bobby Portis(F) MIL @ MEM,,o1.5 +159\nu1.5 -223,,o1.5 +165\nu1.5 -210,,o1.5 +130\nu1.5 -166,o1.5 +160\nu1.5 -210,,o1.5 +146\nu1.5 -204,o1.5 +150\nu1.5 -200,...,o1.5 +155\nu1.5 -204,,,o1.5 +145\nu1.5 -222,,o1.5 +155\nu1.5 -204,,,{1.5: 152.0},{1.5: -204.7}
2/15 8:30 PM Giannis Antetokounmpo MIL @ MEM,,,,o0.5 +145\nu0.5 -180,,,o0.5 +145\nu0.5 -190,,o0.5 +139\nu0.5 -192,o0.5 +165\nu0.5 -225,...,o0.5 +141\nu0.5 -185,,,o0.5 +130\nu0.5 -200,,o0.5 +141\nu0.5 -185,,,{0.5: 143.4},{0.5: -192.8}
2/15 8:30 PM Patrick Beverley(G) MIL @ MEM,,,,o0.5 -130\nu0.5 +100,,,o0.5 -135\nu0.5 +105,,o0.5 -133\nu0.5 -103,o0.5 -140\nu0.5 +105,...,o0.5 -130\nu0.5 +100,,,,,o0.5 -130\nu0.5 +100,,,{0.5: -132.6},{0.5: 72.4}
