forked from lorindavies/R_Downloadtoolkit_CSSEGISandData
-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.R
89 lines (63 loc) · 2.71 KB
/
script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
library(readr)
library(tidyverse)
library(rvest)
library(glue)
library(stringr)
library(lubridate)
# load yesterday's data only ---------------------------------------------------------
d <- lubridate::today() + lubridate::days(-1)
d1<- glue({sprintf("%02d",lubridate::month(d))},"-",{sprintf("%02d",lubridate::day(d))},"-", {sprintf("%02d",lubridate::year(d))})
CSSEGISandData <- read_csv(glue("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/",{d1},".csv"))
# this commented out line is live data - only I haven't worked out how to use it yet - anyone familiar with loading this into a decent format welcome to help out!
# ffg<- read_file("https://covid19-report.today/api/latest-report")
library(jsonlite)
ffg <- fromJSON("https://covid19-report.today/api/latest-report/")
ffg.new <- as_tibble(ffg$report[2:nrow(ffg$report),], .name_repair= ~ffg$report[1,])
ffg.new
ffg_totals <- as_tibble(t(ffg$totals[2,]), .name_repair= ~ffg$totals[1,])
ffg_totals
ffg_last_update <- lubridate::as_datetime(ffg$lastUpdateTimestamp/1000, tz='UTC' )
# not sure about the time zone, the web content did not appear to list a time zone
ffg_last_update
real_time_now <- as.numeric(Sys.time())
real_time_now
working_df <-
CSSEGISandData %>%
group_by(`Country_Region`) %>%
summarise(Confirmed = sum(Confirmed),
Deaths = sum(Deaths),
Recovered = sum(Recovered),
Rate = Deaths / Confirmed *100)
# Scraping data for all days available -----------------------------------------------------------
# read the gitpage for CSSE data
cssegit <- read_html("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports")
# use rvest to extract the table of csvs
tabcsv <-
cssegit %>%
html_table(header = 1, trim = TRUE, fill = TRUE, dec = ".") %>%
as.data.frame()
# filter out non.csv entries and select the filenames
t1 <- tabcsv %>%
filter(str_detect(tolower(`Name`), pattern = ".csv")) %>%
select(`Name`)
# For loop which goes through the list of csvs and build a link to each raw file on github
for (i in t1) {
dat<- glue("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{i}")
}
# use map function to build a list of dfs of all the data.
ldat<-
dat %>%
map(read_csv)
names(ldat) <-i
# run the same function as we did on 'yesterday's data' as above, but on a list of dfs
lapply(
ldat,
function(x) {
x %>%
group_by(`Country/Region`) %>%
summarise(Confirmed = sum(Confirmed),
Deaths = sum(Deaths),
Recovered = sum(Recovered),
Rate = Deaths / Confirmed *100)
}
)