### Scrape GEMP
* Implement proof-of-concept scraper

In [1]:
## 0. Prerequisites
from selenium.webdriver import ChromeOptions
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import re
import time
import bs4 as bs
import hidden
# used to write completed games to csv file
from csv import DictWriter


In [2]:
class GempTable:
    def __init__(self, table):
        self.recorded = table['Recorded']
        self.table_id = table['TableID']
        self.format = table['Format']
        self.tournament = table['Tournament']
        self.status = table['Status']
        self.ls_player = table['LS_Player']
        self.ls_deck = table['LS_Deck']
        self.ds_player = table['DS_Player']
        self.ds_deck = table['DS_Deck']
        self.gamelink = table['GameLink']
        self.winner = table['Winner']
        self.messages = []
    

    def record_gemp_table(self):
        headers = ['Recorded',
                         'TableID',
                         'Format',
                         'Tournament',
                         'Status',
                         'LS_Player',
                         'LS_Deck',
                         'DS_Player',
                         'DS_Deck',
                         'GameLink',
                         'Winner']
        if ((self.status == 'Finished') & (self.format == 'Open') & (self.recorded == 0)):
#             if DEBUG: print(f'recording {self.tableID}')
            with open('gemp_completed_games.csv', 'a', newline='') as f:
                dictwriter_object = DictWriter(f, fieldnames=headers)
                # Pass the data in the dictionary as an argument into the writerow() function
                self.recorded = time.strftime('%Y-%m-%d %H:%M:%S%z (%Z)')
                table = {'Recorded': self.recorded,
                         'TableID': self.table_id,
                         'Format':self.format,
                         'Tournament':self.tournament,
                         'Status':self.status,
                         'LS_Player': self.ls_player,
                         'LS_Deck':self.ls_deck,
                         'DS_Player':self.ds_player,
                         'DS_Deck':self.ds_deck,
                         'GameLink':self.gamelink,
                         'Winner':self.winner,
                        }
                dictwriter_object.writerow(table)
                # Close the file object
                f.close()
            if DEBUG: print(f"recorded {table['TableID']}")
    #     if DEBUG: print(f"length of tables after recording games: {len(tables)}")        
        recorded = True
        return recorded

In [3]:
def get_gemp_tables(driver, gemp_tables):
    # <The Dudley Boyz: GET THE TABLE(S)! />
    
    
    keys = ['Recorded','TableID','Format','Tournament','Status','LS_Player', 'LS_Deck','DS_Player','DS_Deck','GameLink','Winner']
       

        
    playing_table_body = driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[3]/div[1]/div[4]/table/tbody')
    playing_entries = playing_table_body.find_elements_by_tag_name('tr')
    
    finished_table_body = driver.find_element_by_xpath('//*[@id="hall"]/div[1]/div[6]/table/tbody')
    finished_entries = finished_table_body.find_elements_by_tag_name('tr')
 
    
    ## Update/Add rows from Playing Tables
    for i in range(1, len(playing_entries)):
        cols = playing_entries[i].find_elements_by_tag_name('td')
        table_id = playing_entries[i].get_attribute("class")

       
        if table_id not in gemp_tables:
            if DEBUG: print("adding Playing Table: "+table_id)
            re_expr = re.compile('\s*(.*)\s\((?:DARK:\s)(.*)\)\,\s(.*)\s\((?:LIGHT:\s)(.*)\)')
            dark_light = re_expr.findall(cols[3].text)
            if dark_light != []:
                ds_ls_set = list(dark_light[0])
            else:
                re_expr = re.compile('\s*(.*)\s\((?:LIGHT:\s)(.*)\)\,\s(.*)\s\((?:DARK:\s)(.*)\)')
                light_dark = re_expr.findall(cols[3].text)
                ls_ds_set = list(light_dark[0])
                
            
            table = {}
            table['Recorded'] = 0
            table['TableID'] = table_id
            table['Format'] = cols[0].text
            table['Tournament']= cols[1].text
            table['Status']= cols[2].text
            if table['Status'] != 'Preparation':
                if dark_light != []:
                    table['DS_Player'] = ds_ls_set[0] 
                    table['DS_Deck'] = ds_ls_set[1] 
                    table['LS_Player'] = ds_ls_set[2] 
                    table['LS_Deck'] = ds_ls_set[3] 
                else:
                    table['LS_Player'] = ls_ds_set[0] 
                    table['LS_Deck'] = ls_ds_set[1] 
                    table['DS_Player'] = ls_ds_set[2] 
                    table['DS_Deck'] = ls_ds_set[3] 
                if len(playing_entries[i].find_elements_by_tag_name('a')) > 0:
                    table['GameLink']= playing_entries[i].find_elements_by_tag_name('a')[0].get_attribute("href")
                table['Winner']= 'NA'
                # Add table only if 'Format' is 'Open'
                if(table['Format'] == 'Open'):
                    new_active_open_table = GempTable(table)
                    gemp_tables[table_id] = new_active_open_table
                    gemp_tables['active'].append(table_id)
                
    
    # check length of tables dictionary
    if DEBUG: print(f'length of tables after Playing Tables: {len(gemp_tables)}')
    
    ## Updates tables with rows from Finished Tables
    for i in range(1, len(finished_entries)):
        cols = finished_entries[i].find_elements_by_tag_name('td')
        table_id = finished_entries[i].get_attribute("class")
        
#         print(f'row {i} of {len(finished_entries)-1} with table_id of {table_id}')
        if ((table_id in gemp_tables) & (len(cols) > 0)):
            if ((table_id not in gemp_tables['recorded']) & (cols[2].text != 'Cancelled')):
                if DEBUG: print("updating Finished Table: "+table_id)
#                 table = tables[table_id]
                gemp_tables[table_id].status = cols[2].text
                gemp_tables[table_id].winner = cols[4].text
                gemp_tables['finished'].append(table_id)
                gemp_tables['active'].remove(table_id)
                
        elif (cols[2].text != 'Cancelled'):
            if DEBUG: print("adding Finished Table: "+table_id)
            re_expr = re.compile('\s*(.*)\s\((?:DARK:\s)(.*)\)\,\s(.*)\s\((?:LIGHT:\s)(.*)\)')
            dark_light = re_expr.findall(cols[3].text)
            if dark_light != []:
                ds_ls_set = list(dark_light[0])
            else:
                re_expr = re.compile('\s*(.*)\s\((?:LIGHT:\s)(.*)\)\,\s(.*)\s\((?:DARK:\s)(.*)\)')
                light_dark = re_expr.findall(cols[3].text)
                ls_ds_set = list(light_dark[0])
            
            table = {}
            table['Recorded'] = 0
            table['TableID'] = table_id
            table['Format'] = cols[0].text
            table['Tournament']= cols[1].text
            table['Status']= cols[2].text
            if dark_light != []:
                table['DS_Player'] = ds_ls_set[0] 
                table['DS_Deck'] = ds_ls_set[1] 
                table['LS_Player'] = ds_ls_set[2] 
                table['LS_Deck'] = ds_ls_set[3] 
            else:
                table['LS_Player'] = ls_ds_set[0] 
                table['LS_Deck'] = ls_ds_set[1] 
                table['DS_Player'] = ls_ds_set[2] 
                table['DS_Deck'] = ls_ds_set[3] 
            table['GameLink']= 'NA'
            table['Winner']= cols[4].text
            # Add table only if 'Format' is 'Open'
            if(table['Format'] == 'Open'):
                new_finished_table = GempTable(table)
                gemp_tables[table_id] = new_finished_table
                print()
                gemp_tables['finished'].append(table_id)
                
    # check length of tables dictionary
    if DEBUG: print(f'length of tables after Finished Tables: {len(gemp_tables)}')
    
    return gemp_tables
    

def record_gemp_table_messages(table):
    pass
    

In [4]:
def login_gemp(driver):
    driver.get (secrets["login"])
    driver.find_element_by_id("login").send_keys(secrets["user"])
    driver.find_element_by_id("password").send_keys(secrets["pass"])
    driver.find_element_by_id("password").send_keys(Keys.RETURN)
    return driver


def open_gemp_table(driver, table_path):
    driver.execute_script(f'''window.open({table_path});''')
    return


def listen_for_gemp_tables(driver):
    listening = True
    gemp_tables = {'active':[], 'finished':[], 'recorded':[]}
    sleep_time = 120 # length of time between cycles
    while(listening):
        gemp_tables['finished'] = []
        gemp_tables = get_gemp_tables(driver, gemp_tables)
#         gemp_tables = get_finished_gemp_tables(driver, gemp_tables)
        
        if (len(gemp_tables['finished']) > 0):
            for table_id in gemp_tables['finished']:
                if table_id not in gemp_tables['recorded']:
                    gemp_tables[table_id].record_gemp_table()
                    gemp_tables['recorded'].append(table_id)
        
        
        
#         for table_id in list(tables.keys()):
#             # record "Finished" gemp tables
#             if ((tables[table_id]['Status'] == 'Finished') & (table_id not in recorded_tables.keys()) & (table_id not in active_tables.keys())):
#                 finished_table = tables.pop(table_id)
#                 recorded_tables[table_id] = record_gemp_table(finished_table)
#             elif ((tables[table_id]['Status'] == 'Finished') & (table_id not in recorded_tables.keys()) & (table_id in active_tables.keys()))
#                 finished_table = active_tables.pop(table_id) 
#                 recorded_tables[table_id] = record_gemp_table(finished_table)
        
        # CREATE NEW WEBDRIVERs to WATCH AND RECORD RELEVANT ACTIVE GAMES
        # CREATE STRUCTURE FOR SIMULTANEOUS GAMES
        
        if listening:
            if DEBUG: print(f"sleeping for {sleep_time} seconds...")
            time.sleep(sleep_time)
    return 0


def access_gemp(site_URL):
    chrome_options = webdriver.ChromeOptions()
#     chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=chrome_options)
    driver = login_gemp(driver)
    driver.get(site_URL)
    try: 
        driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[3]/div[1]/div[5]/div').click()
    except:
        pass
    time.sleep(2)
    listen_for_gemp_tables(driver)
    
    source = driver.page_source
#     driver.quit()
    return source
  

In [5]:
# get gemp data
DEBUG = True
if DEBUG:
    secrets = hidden.dev_secrets()
else:
    secrets = hidden.secrets()
if DEBUG: print(f'Debug is ON')
site_URL = secrets["host"]
source = access_gemp(site_URL)

Debug is ON
length of tables after Playing Tables: 3
adding Finished Table: table10

length of tables after Finished Tables: 4
recorded table10
sleeping for 120 seconds...


KeyboardInterrupt: 