-
Notifications
You must be signed in to change notification settings - Fork 0
/
driver.py
88 lines (77 loc) · 3.09 KB
/
driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Web Driver
"""
import os
import os.path
from time import sleep
from contextlib import contextmanager
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
class DriverWrapper(object):
"""
Wrapped driver. Insert sleep operation for loading page.
"""
def __init__(self, driver):
self.driver = driver
def __getattr__(self, attr):
orig_attr = self.driver.__getattribute__(attr)
if callable(orig_attr):
def _hooked(*args, **kwargs):
result = orig_attr(*args, **kwargs)
# prevent driver from becoming unwrapped
if result == self.driver:
return self
self._post_command(attr)
return result
return _hooked
return orig_attr
def _post_command(self, attr):
if attr == 'get':
sleep(10)
elif attr == 'find_element_by_xpath':
sleep(2)
def scrape_battle_urls(self, battle_format):
"""Scrape urls of battles from https://play.pokemonshowdown.com."""
self.get("https://play.pokemonshowdown.com/battles")
self.find_element_by_xpath("//button[@name='selectFormat']").click()
self.find_element_by_xpath("//button[@value='%s']" % battle_format).click()
self.find_element_by_xpath("//input[@name='elofilter']").click()
self.find_element_by_xpath("//input[@name='elofilter']")
soup = BeautifulSoup(self.page_source, "html5lib")
links = soup.select('a[href^="/battle-"]')
return list("https://play.pokemonshowdown.com"+link["href"] for link in links)
def download_battle(self, url):
"""Download battle html. Try 10 times."""
self.get(url)
self.find_element_by_xpath("//button[@name='openSounds']").click()
self.find_element_by_xpath("//input[@name='muted']").click()
for _ in range(3):
try:
self.find_element_by_partial_link_text("Download")
with open('downloads/'+url.split("/")[-1]+'.html', 'wb') as file:
soup = BeautifulSoup(self.page_source, "html5lib")
log = soup.find(class_='battle-log').prettify()
file.write(log.encode())
# element.click()
return
except NoSuchElementException:
sleep(60)
sleep(2)
@contextmanager
def create_driver(*, headless=True):
"""Returns DriverWrapper. This driver is closed at the end of generator."""
options = webdriver.ChromeOptions()
if headless:
options.add_argument('--headless')
options.add_argument('--disable-gpu')
prefs = {"download.default_directory" : os.getcwd()+'/downloads'}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=options)
driver = DriverWrapper(driver)
try:
yield driver
finally:
driver.close()