## Import packages

In [1]:
import selenium
from selenium import webdriver

import shutil, time, os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from importlib import reload

import src
reload(src)

<module 'src' from 'C:\\Users\\Hugh\\git\\warcraftLogs\\src.py'>

## Configurations

In [2]:
# Set path variables
path_to_adblocker = r"C:\Users\Hugh\Desktop\3.11.2_0"
path_to_download_dir = r"C:\Users\Hugh\Downloads"

# Enable/disable SMS
twilio = False

In [3]:
# Number of parses to scrape
N_parses = 121
retry_attempts = 3

## Navigate to WCL

In [4]:
boss = "High Warlord Naj'entus"

In [5]:
# Dictionary for browser to navigate to boss page URLs
boss_link_dict = {"High Warlord Naj'entus" : "#boss=601", "Supremus" : "#boss=602", "Shade of Akama" : "#boss=603", 
                  "Teron Gorefiend" : "#boss=604", "Gurtogg Bloodboil" : "#boss=605", "Reliquary of Souls" : "#boss=606", 
                  "Mother Shahraz" : "#boss=607", "The Illidari Council" : "#boss=608", "Illidan Stormrage" : "#boss=609", 
                  "Rage Winterchill" : "#boss=618", "Anetheron" : "#boss=619", "Kaz'rogal" : "#boss=620", 
                  "Azgalor" : "#boss=621", "Archimonde" : "#boss=622"}

In [6]:
boss_page_url, browser = src.load_top_N_scraper(path_to_adblocker, boss, boss_link_dict)

## Main code loop

In [175]:
def main():
    
    retry_attempt = 0
    
    # The browser can crash at random due to the page loading too slowly. Rather than restarting the loop manually,
    # use 'retry_attempts' (default = 3) and the following while loop to restart automatically.
    while True: 

        
        # Twilio is an SMS service that will send a text to your phone when the code finishes/crashes.
        # If you haven't setup twilio then set the flag to False in the Configuration cell.
        if twilio: 
            from twilio.rest import Client
            accountSID, authToken, myTwilioNumber, myCellPhone = src.get_twilio_info()
            twilioCli = Client(accountSID, authToken)

            
        page = 1
        boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps'
        browser.get(boss_page_url)
        time.sleep(2)

        
        # First check to see if any of the rankings have changed since last scraping and update the excel file
        # TODO: This loop opens/closes the excel file each loop (very slow). Update to only open the spreadsheet once
        print("Checking for rank changes since last scrape...")
        for i in range(1, N_parses):

            rank, name, server, region, date, HPS, duration = src.get_boss_data_top_N_scraper(browser, boss, boss_link_dict, i)

            rank_changed = src.check_if_rank_changed(boss, rank, name, date)
            if rank_changed: 
                src.update_rank(boss, rank, name, date)

            if i % 100 == 0: 
                page += 1
                boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps&page={page}'
                browser.get(boss_page_url)
                time.sleep(2)    

                
        print("Rank updates complete.")
        print('-----------')
        time.sleep(2)

        boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps'
        browser.get(boss_page_url)
        time.sleep(2)
        page = 1

        
        try:
            print("Beginning data scrape...")

            for i in range(1, N_parses):

                rank, name, server, region, date, HPS, duration = src.get_boss_data_top_N_scraper(browser, boss, boss_link_dict, i)

                if src.check_if_parse_already_recorded_top_N(boss, rank, name): 
                    if i % 100 == 0: 
                        page += 1
                        boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps&page={page}'
                        browser.get(boss_page_url)
                        time.sleep(2) 

                    continue

                print(rank, name, server, region)

                link = browser.find_element_by_link_text(name)
                link.click()
                time.sleep(1)

                player_df = pd.DataFrame(pd.np.empty((0, 23)))
                player_df.columns = ["Rank", "Name", "Server", "Date", "Duration", "nHealers", "Spriest?", "Innervate?", "Bloodlust?", "Nature's Grace?", "Power Infusion?", "LB_uptime", "HPS", "% LB (tick) HPS", "% LB (bloom) HPS", "% Rejuv HPS", "% Regrowth HPS", "% Swiftmend HPS", "Rotating on tank?", "Rotation 1", "% Rotation 1", "Rotation 2", "% Rotation 2"]

                temp_url = browser.current_url
                time.sleep(1)

                boss_tanks = src.get_tanks(browser)
                nHealers = src.get_nHealers(browser)

                print(f"Tanks: {boss_tanks}")

                browser.get(temp_url)
                time.sleep(0.5)

                player_link = browser.find_element_by_link_text(name)
                player_link.click()
                time.sleep(0.5)

                # Scrape HPS data
                LBtick_HPS, LBbloom_HPS, rejuv_HPS, regrowth_HPS, swiftmend_HPS, LB_uptime = src.get_spell_info(browser, HPS)
                time.sleep(2)

                # Check for buffs
                spriest = src.check_spriest(browser)
                innervate, bloodlust, powerInfusion, naturesGrace = src.check_buffs(browser)
                time.sleep(0.5)    

                # Download the cast-sequence CSV.
                src.download_csv(browser, temp_url, "filter-casts-tab", path_to_download_dir, "data/cast_sequence.csv")
                time.sleep(1)

                # Clean the csv
                df = src.clean_cast_sequence_csv()
                df = src.fix_cast_time(df)
                time.sleep(0.5)

                # Get the rotations
                rotation1, rotation1_percent, rotation2, rotation2_percent, rotating_on_tank = src.calculate_rotations(df, boss, boss_tanks, LB_uptime)
                print(f'Rotations: {rotation1} ({rotation1_percent}), {rotation2} ({rotation2_percent})')  

                # Export data and cleanup
                to_append = [rank, name, server + " " + region, date, duration, str(nHealers), spriest, innervate, bloodlust, powerInfusion, naturesGrace, LB_uptime, HPS, LBtick_HPS, LBbloom_HPS, rejuv_HPS, regrowth_HPS, swiftmend_HPS, rotating_on_tank, rotation1, rotation1_percent, rotation2, rotation2_percent]
                src.export_to_excel(boss, to_append, player_df, name, 'top_N_druids', True)

                os.remove(f"data/{boss.replace(' ', '')}_{name}.csv")
                os.remove(f"data/cast_sequence.csv")
                os.remove(f"data/cast_sequence_fixed.csv")

                print('-------')
                browser.get(boss_page_url)
                time.sleep(1)

                if i % 100 == 0: 
                    page += 1
                    boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps&page={page}'
                    browser.get(boss_page_url)
                    time.sleep(2) 


            print(f"{boss} scraping complete.")
            print("---------------")

            message = twilioCli.messages.create(body='Data scraping complete!', from_=myTwilioNumber, to=myCellPhone)
            break

        except:
            retry_attempt += 1
            time.sleep(10)
            if retry_attempt == retry_attempts: 
                message = twilioCli.messages.create(body='Data scraping crashed', from_=myTwilioNumber, to=myCellPhone)
                break
                
                
if __name__ == "__main__":
    main()

Checking for rank changes since last scrape...
Rank updates complete.
-----------
Beginning data scrape...
107 月冷清秋  帕奇维克
Tanks: ['伊伊向前冲']
9.15%
1LB 0I 4RG 0.1
0LB 0I 5RG 0.5
0LB 1I 4RG 0.4
0.9
Rotating on tank?:  No
Rotations: 0LB 0I 5RG (0.5), 0LB 1I 4RG (0.4)
-------
108 Littlejin  Benediction
Tanks: ['Angelbeats']
33.67%
1LB 3I 0RG 0.083
1LB 0I 4RG 0.083
0LB 0I 5RG 0.417
0LB 1I 4RG 0.167
1LB 2I 2RG 0.167
0LB 1I 3RG 0.083
0.6669999999999999
Rotating on tank?:  No
Rotations: 0LB 0I 5RG (0.417), 0LB 1I 4RG (0.167)
-------
109 啵啵球  哈霍兰
Tanks: ['灬梦醒时分', '电梯丶战神']
73.38%
1LB 1I 2RG 0.059
1LB 0I 3RG 0.118
1LB 0I 4RG 0.235
1LB 1I 1RG 0.118
1LB 0I 2RG 0.118
1LB 2I 1RG 0.059
0LB 1I 4RG 0.118
0LB 0I 5RG 0.059
1LB 1I 3RG 0.118
0.177
Rotating on tank?:  Yes
Rotations: 1LB 0I 4RG (0.235), 1LB 0I 3RG (0.118)
-------
110 Rundru  Westfall
Tanks: ['Zenmetsu', 'Sardonyx']


NameError: name 'retry_attempts' is not defined

In [151]:
from selenium.webdriver.common.by import By

In [146]:
k = 2
i = 101
boss_page_url = f'https://classic.warcraftlogs.com/zone/rankings/1011{boss_link_dict[boss]}&class=Druid&spec=Restoration&metric=hps&page={k}'

In [141]:
browser.get(boss_page_url)

In [147]:
row = browser.find_elements_by_id(f"row-{boss_link_dict[boss].split('=')[1]}-{i}")

In [153]:
cell = row[0].find_elements(By.XPATH, 'td')

In [155]:
cell[1].text

'村口赵奕欢'