In [1]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By

import bs4
import re

import pandas as pd
from dateutil import parser

In [2]:
options = webdriver.EdgeOptions()
# options.add_argument('headless')
options.add_argument('inprivate')
driver = webdriver.Edge(options= options)

In [3]:
driver.get('https://qlstats.net/games?server_id=5935')

### Bypass cookie screen

In [4]:
def is_cookie_screen(driver : selenium.webdriver):
    '''A function to check if the given webpage is the 'accept cookies' screen.
    Regex matches the body of the '''
    element = driver.find_element(By.TAG_NAME, value = 'body')
    cookie_screen = re.compile(r'.*(To continue using qlstats, you need to agree to the use of cookies.\nAgree).*')
    if cookie_screen.search(element.text) is None:
        return False
    return True
#press the button


if is_cookie_screen(driver):
    try:
        button = driver.find_element(By.TAG_NAME, 'button')
        button.click()
    except:
        "Cookies could not be accepted, please recheck"

### Scrape server match info

In [5]:
soup = bs4.BeautifulSoup(driver.page_source)

In [12]:
table = soup.find('table', attrs = {'class': 'table table-hover table-condensed'})

In [46]:
scraped = []

header = table.find('thead')
#find columns in head
head = []
for col in header.findAll('th'):
    head.append(col.text)

# find indices of '', 'type', 'map', 'score', 'rated'
search_indices = [head.index(x) for x in ['', 'Time', 'Type', 'Map', 'Score', 'Rated']]
search_key_vals = dict(zip(search_indices, ['', 'Time', 'Type', 'Map', 'Score', 'Rated']))

content = table.find('tbody')
for row in content.findAll('tr'):
    tr_info = []
    all_content = row.findAll('td')
    for idx in search_indices:
        element = all_content[idx]
    
        if search_key_vals[idx] == '':
            # view button
            tr_info.append(element.find('a').get('href'))
        
        elif search_key_vals[idx] == 'Time':
            tr_info.append(parser.parse(element.find('span', attrs = {'class':'abstime'}).text))

        elif search_key_vals[idx] in ['Type', 'Map', 'Score', 'Rated']:
            # other columns
            tr_info.append(element.text.strip())

    #append to scraped
    scraped.append(tr_info)
        
            

In [47]:
scraped

[['/game/8892316',
  datetime.datetime(2023, 8, 14, 0, 32, 27),
  'ca',
  'eviscerated',
  '10:9',
  'A'],
 ['/game/8892261',
  datetime.datetime(2023, 8, 14, 0, 15, 36),
  'ca',
  'hearth',
  '10:6',
  'A'],
 ['/game/8892223',
  datetime.datetime(2023, 8, 14, 0, 2, 27),
  'ca',
  'asylum',
  '9:10',
  'A'],
 ['/game/8892179',
  datetime.datetime(2023, 8, 14, 23, 44, 34),
  'ca',
  'overek',
  '10:5',
  'A'],
 ['/game/8892128',
  datetime.datetime(2023, 8, 14, 23, 27, 45),
  'ca',
  'quarantine',
  '10:7',
  'A'],
 ['/game/8892086',
  datetime.datetime(2023, 8, 14, 23, 10, 20),
  'ca',
  'trinity',
  '8:10',
  'A'],
 ['/game/8892043',
  datetime.datetime(2023, 8, 14, 22, 49, 34),
  'ca',
  'almostlost',
  '10:8',
  'A'],
 ['/game/8892011',
  datetime.datetime(2023, 8, 14, 22, 31, 49),
  'ca',
  'campgrounds',
  '10:1',
  'A'],
 ['/game/8891984',
  datetime.datetime(2023, 8, 14, 22, 19, 18),
  'ca',
  'asylum',
  '10:6',
  'A'],
 ['/game/8889851',
  datetime.datetime(2023, 8, 12, 23, 53

## Testing

In [7]:
urls = [x.get('href') for x in soup.find('table').find_all('a', attrs = {'class':'btn'}, recursive = True)]
urls

['/game/8892316',
 '/game/8892261',
 '/game/8892223',
 '/game/8892179',
 '/game/8892128',
 '/game/8892086',
 '/game/8892043',
 '/game/8892011',
 '/game/8891984',
 '/game/8889851',
 '/game/8889819',
 '/game/8889786',
 '/game/8889753',
 '/game/8889728',
 '/game/8889690',
 '/game/8889662',
 '/game/8889624',
 '/game/8887674',
 '/game/8887645',
 '/game/8887609']

In [8]:
dates = [parser.parse(x.text) for x in soup.find_all('span', attrs = {'class':'abstime'})]
dates

[datetime.datetime(2023, 8, 14, 0, 32, 27),
 datetime.datetime(2023, 8, 14, 0, 15, 36),
 datetime.datetime(2023, 8, 14, 0, 2, 27),
 datetime.datetime(2023, 8, 14, 23, 44, 34),
 datetime.datetime(2023, 8, 14, 23, 27, 45),
 datetime.datetime(2023, 8, 14, 23, 10, 20),
 datetime.datetime(2023, 8, 14, 22, 49, 34),
 datetime.datetime(2023, 8, 14, 22, 31, 49),
 datetime.datetime(2023, 8, 14, 22, 19, 18),
 datetime.datetime(2023, 8, 12, 23, 53, 54),
 datetime.datetime(2023, 8, 12, 23, 39, 16),
 datetime.datetime(2023, 8, 12, 23, 20, 17),
 datetime.datetime(2023, 8, 12, 23, 2, 45),
 datetime.datetime(2023, 8, 12, 22, 48, 1),
 datetime.datetime(2023, 8, 12, 22, 30, 11),
 datetime.datetime(2023, 8, 12, 22, 10, 55),
 datetime.datetime(2023, 8, 12, 21, 46, 57),
 datetime.datetime(2023, 8, 11, 23, 49, 15),
 datetime.datetime(2023, 8, 11, 23, 38, 53),
 datetime.datetime(2023, 8, 11, 23, 23, 36)]

In [9]:
next_btn = soup.find('a', attrs = {"name": "Next Page"}).get('href')

'/games?server_id=5935&start_game_id=8887608'