forked from YaqiongYao/UCSAS_WebScrapping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
code.R
90 lines (67 loc) · 2.34 KB
/
code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
## import data from websites directly
url <- "http://www.tennis-data.co.uk/2020/ausopen.csv"
tennis_aus <- read.csv(url)
str(tennis_aus)
## webscraping using rvest
require(rvest)
url_elo <- "http://tennisabstract.com/reports/atp_elo_ratings.html"
webpage <- read_html(url_elo)
elo_class <- webpage %>%
html_nodes(".tablesorter") %>%
html_table()
elo_id <- webpage %>%
html_nodes("#reportable") %>%
html_table()
identical(elo_class, elo_id)
## Web scraping using RSelenium
require("RSelenium")
rD <- rsDriver(port = 5563L, chromever = "85.0.4183.87")
remDr <- rD$client
url <- "http://www.flashscore.com/match/Cj6I5iL9/#match-statistics;0"
remDr$navigate(url)
webElem <- remDr$findElements(using = 'class', "statBox")
webElem <- unlist(lapply(webElem, function(x){x$getElementText()}))[[1]]
head(unlist(strsplit(webElem, split = '\n')))
remDr$close()
rD <- rsDriver(port = 5572L, chromever = "85.0.4183.87")
remDr <- rD$client
url <- "https://www.flashscore.com/team/connecticut-huskies/8rqVf3Tj/results/"
remDr$navigate(url)
repeat{
b <- tryCatch({
suppressMessages({
webElemMore <- remDr$findElement(using = 'xpath',
'//*[@id="live-table"]/div[1]/div/div/a')
webElemMore$clickElement()
})
}, error = function(e) e)
if(inherits(b, "error")) break
}
webElemTime <- remDr$findElements(using = 'xpath',
'//*[@class="event__time"]')
webElemTime <-
unlist(lapply(webElemTime, function(x){x$getElementText()}))
webElemTime <- gsub("\\n", " ", webElemTime)
webElemHome <-
remDr$findElements(using = 'class',
'event__participant')
webElemHome <-
unlist(lapply(webElemHome, function(x){x$getElementText()}))
webElemScore <-
remDr$findElements(using = 'class', 'event__score')
webElemScore <-
unlist(lapply(webElemScore, function(x){x$getElementText()}))
webElemResult <-
remDr$findElements(using = 'class', 'wld')
webElemResult <-
unlist(lapply(webElemResult, function(x){x$getElementText()}))
n <- length(webElemHome)
basketball <-
data.frame(time = webElemTime,
Home = webElemHome[seq(n) %% 2 == 1],
Away = webElemHome[seq(n) %% 2 == 0],
HomeS = webElemScore[seq(n) %% 2 == 1],
AwayS = webElemScore[seq(n) %% 2 == 0],
Result = webElemResult)
head(basketball)
remDr$close()