In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
import pandas as pd
import re
from datetime import datetime


def extract_text(s):
    # Define a regular expression pattern to match the desired components
    pattern = r'o(\d+\.\d+)\s*(-?\d+)\nu(\d+\.\d+)\s*([-+]\d+)'

    # Use re.findall to find all occurrences of the pattern in the string
    matches = re.findall(pattern, s)

    # Extract the matched groups from each occurrence
    # results = [(float(match[0]), int(match[1]), match[2], int(match[3])) for match in matches]
    if len(matches) == 0:
        return []
    # print(matches[0])
    return [float(matches[0][0]),int(matches[0][1]),float(matches[0][2]),int(matches[0][3])]

def oavgc(row):
    o = {}
    o2 = {}
    for c in df.columns:
        if "o" in row[c]:
            results = extract_text(row[c])
            if len(results) != 4:
                continue
            if results[0] in o:
                o[results[0]].append(results[1])
            else:
                o[results[0]] = [results[1]]
    for p in o.keys():
        o2[p] = round(sum(o[p]) / len(o[p]),1)
    return o2

def uavgc(row):
    o = {}
    o2 = {}
    for c in df.columns:
        if "u" in row[c]:
            results = extract_text(row[c])
            if len(results) != 4:
                continue
            if results[0] in o:
                o[results[0]].append(results[3])
            else:
                o[results[0]] = [results[3]]
    for p in o.keys():
        o2[p] = round(sum(o[p]) / len(o[p]),1)
    return o2

def cleanpd(s):
    o = s.replace("  ", " ,").replace("\no", "\n,o").replace("\n ", "\n, ")
    return(o.split(","))

# Assuming you have set up your WebDriver, such as ChromeDriver
driver = webdriver.Chrome()

# Navigate to your webpage
driver.get("https://unabated.com/nba/props")
driver.execute_script("document.body.style.zoom='30%'")
time.sleep(0.3)

# Find names of sportsbooks used
col_ids = []
id2sbnames = {}
sbln = driver.find_elements(By.CSS_SELECTOR, ".ag-header-container > div:nth-child(1) > div")
for sb in sbln:
    col_ids.append(sb.get_attribute("col-id"))
# print(col_ids)
sbtitle = sb.find_elements(By.XPATH, "//div/div[@style and @title]")
for n,s in enumerate(sbtitle):
    tit = s.get_attribute("title")
    if "Real" in tit or "Low" in tit:
        titsho = tit.split(" \n")[0]
        id2sbnames[col_ids[n-1]] = titsho
        # print("tit",tit)
    
# print(id2sbnames)

# Find the first div with class .ag-center-cols-viewport
viewport_div = driver.find_element(By.CSS_SELECTOR,'.ag-center-cols-viewport')
name_div = driver.find_element(By.CSS_SELECTOR,'.ag-pinned-left-cols-container')

# Keep track of the number of child divs found
last_child_count = 0
player_data = {}
id2name = {}
visited_pages = []
pagebutton = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > button:nth-child(1)")
driver.execute_script("arguments[0].click();", pagebutton)
dropdowndiv = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > div:nth-child(2)")
pages = dropdowndiv.find_elements(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > div:nth-child(2) > button")
findpage = True

while True:
    prevpages = visited_pages
    # print("WHILE STARTED", visited_pages)
    for page in pages:
        attempts = 0
        parsed = False
        while(attempts < 5 and not parsed):
            try:
                page_name = page.text
                parsed = True
                break
            except:
                attempts += 1
        if page_name not in visited_pages and findpage:
            print("Scanning page:", page_name)
            break
            
    visited_pages.append(page_name)
    time.sleep(0.5)
    driver.execute_script("arguments[0].click();", page)
    # print("CLICKED")
    time.sleep(3)

    while True:

        ## FIND ALL ROWS AND CELLS
        child_divs = viewport_div.find_elements(By.CSS_SELECTOR, '.ag-center-cols-container > div')
        id_child = name_div.find_elements(By.CSS_SELECTOR, ".ag-pinned-left-cols-container > div")
        for index, row in enumerate(child_divs):
            try:
                cells = row.find_elements(By.CSS_SELECTOR, "div.ag-cell.ag-cell-not-inline-editing.ag-cell-auto-height.ag-cell-value")
                player_id = row.get_attribute('row-id')
                player_data[player_id] = {}
                for cell in cells:
                    col_id = cell.get_attribute("col-id")
                    player_data[player_id][col_id] = cell.text
            except:
                pass
        

        ## GET PLAYER NAME
        for index, div in enumerate(id_child, start=1):
            id2name[div.get_attribute('row-id')] = div.text
                
        ## CHECK FOR BREAK CONDITIONS
        if len(player_data.keys()) == last_child_count:
            break
        last_child_count = len(player_data.keys())

        ## SCROLL
        time.sleep(0.3)
        actions = ActionChains(driver)
        body = child_divs[-1]
        actions.move_to_element(body)
        try:
            actions.click_and_hold().move_by_offset(0, 100).release().perform()
        except:
            pass

    labeled_player_data = {}

    for key in player_data:
        pname = id2name[key].split("\nEdit")[0].replace("\n", " ")
        labeled_player_data[pname] = {}
        for key2 in player_data[key]:
            labeled_player_data[pname][id2sbnames[key2]] = player_data[key][key2]

    df = pd.DataFrame(labeled_player_data).T
    df['Over'] = df.apply(oavgc, axis=1)
    df['Under'] = df.apply(uavgc, axis=1)
    now = datetime.now()
    date_time_str = now.strftime("Date %Y-%m-%d.  Hour %H")
    df.to_csv(page_name + " " + date_time_str + '.csv')

    ## TIME TO FIND A NEW PAGE TO CLICK
    pagebutton = driver.find_element(By.CSS_SELECTOR, "div.dropdown:nth-child(2) > button:nth-child(1)")
    driver.execute_script("arguments[0].click();", pagebutton)
    time.sleep(0.5)
    attempts = 0
    parsed = False
    while(attempts < 20 and not parsed):
        try:
            dropdowndiv = driver.find_element(By.XPATH, "//div[@class='ml-2 dropdown-menu show']")
            parsed = True
            break
        except:
            attempts += 1
    
    pages = dropdowndiv.find_elements(By.TAG_NAME, "button")
    print("_________________")
    for page in pages:
        attempts = 0
        parsed = False
        while(attempts < 5 and not parsed):
            try:
                page_name = page.text
                parsed = True
                break
            except:
                attempts += 1
        if page_name not in visited_pages:
            print("Found new page:", page_name)
            break
    
    if len(visited_pages) == len(pages) or prevpages == len(visited_pages):
        print("All pages processed")
        break
    
        
# Loop to keep scrolling down until no more new child divs are loaded


# Finally, remember to close the browser window when done
driver.quit()


Scanning page: 3pts Made
Passed pages
Found new page: AssistsSIM
Scanning page: AssistsSIM
Passed pages
Found new page: Blocks
Scanning page: Blocks
Passed pages
Found new page: PointsSIM
Scanning page: PointsSIM
Passed pages
Found new page: Pts+AstSIM
Scanning page: Pts+AstSIM
Passed pages
Found new page: Pts+RebSIM
Scanning page: Pts+RebSIM
Passed pages
Found new page: Pts+Reb+AstSIM
Scanning page: Pts+Reb+AstSIM
Passed pages
Found new page: ReboundsSIM
Scanning page: ReboundsSIM
Passed pages
Found new page: Reb+AstSIM
Scanning page: Reb+AstSIM
Passed pages
Found new page: Steals
Scanning page: Steals
Passed pages
Found new page: Stl+Blk
Scanning page: Stl+Blk
Passed pages
Found new page: Turnovers
Scanning page: Turnovers
Passed pages
BREAKING 12 12
All pages processed
