meteoR is an unofficial R wrapper around the OPTED Meteor API.
You can install the development version of meteoR from GitHub with:
# install.packages("devtools")
devtools::install_github("thieled/meteoR")
This is a basic example which shows you how to solve a common problem:
library(meteoR)
# Get the country uid from the API
countries <- call_meteor(method = "get",
ressource = "query",
type = "Country",
format = "dataframe"
)
#> 50 obs. + 50 obs. + 50 obs. + 50 obs. + 50 obs. + 2 obs.
channel <- call_meteor(method = "get",
ressource = "query",
type = "Channel",
format = "dataframe"
)
#> 22 obs.
channel_website <- dplyr::filter(channel, `_unique_name` == "website")
countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
"Germany"))
# Query the API for news sources from these countries
news_sources <- call_meteor(method = "get",
ressource = "query",
type = "NewsSource",
countries = countries_selection$uid,
channel = channel_website$uid,
geographic_scope = c("national"),
format = "dataframe",
publication_kind = c("newspaper", "magazine"),
n_max = 10, n = 10
)
#> 10 obs.
# Get more insights
view_df <- view_uid(uid = news_sources$uid,
format = "dataframe",
unnest_cutoff = 1)
#> 32 obs. 30 obs. 29 obs. 30 obs. 30 obs. 27 obs. 29 obs. 23 obs. 23 obs. 30 obs.
dplyr::glimpse(view_df)
#> Rows: 10
#> Columns: 48
#> $ `_date_created` <chr> "2023-02-08T14:16:02.771084+…
#> $ `_unique_name` <chr> "newssource_de_bilanzde_webs…
#> $ audience_size_recent <int> 3033, 11875, 4096, 42832, 16…
#> $ `audience_size_recent|timestamp` <chr> "2023-02-08T00:00:00+00:00",…
#> $ `audience_size_recent|unit` <chr> "daily visitors", "daily vis…
#> $ contains_ads <chr> "yes", "yes", "no", "yes", "…
#> $ defunct <lgl> TRUE, FALSE, NA, NA, FALSE, …
#> $ entry_review_status <chr> "accepted", "accepted", "acc…
#> $ geographic_scope <chr> "national", "national", "nat…
#> $ identifier <chr> "https://bilanz.de/", "https…
#> $ name <chr> "bilanz.de", "die-bank.de", …
#> $ party_affiliated <chr> "NA", NA, "no", "NA", "yes",…
#> $ payment_model <chr> "NA", "partly free", "partly…
#> $ publication_cycle <chr> "NA", "continuous", "continu…
#> $ special_interest <lgl> TRUE, TRUE, FALSE, FALSE, FA…
#> $ uid <chr> "0x42020", "0x445c6", "0x187…
#> $ verified_account <lgl> FALSE, FALSE, NA, NA, NA, NA…
#> $ date_founded <chr> NA, NA, NA, "1996-01-01T00:0…
#> $ audience_size <list> "2023-02-08T00:00:00+00:00"…
#> $ publication_kind <list> "magazine", "magazine", <NU…
#> $ topical_focus <list> "economy", "economy", <NULL…
#> $ alternate_names <list> <NULL>, <NULL>, <NULL>, <NU…
#> $ `_edited_by__edited_by|timestamp` <chr> "2023-02-08T14:16:15.015135+…
#> $ `_edited_by_display_name` <chr> "Paul Balluff", "Paul Balluf…
#> $ `_edited_by_uid` <chr> "0x2711", "0x2711", "0x2711"…
#> $ `audience_size|count_0` <int> 3033, 11875, 4096, 42832, 16…
#> $ `audience_size|data_from_0` <chr> "https://siterankdata.com/bi…
#> $ `audience_size|unit_0` <chr> "daily visitors", "daily vis…
#> $ countries__unique_name <chr> "country_germany", "country_…
#> $ countries_entry_review_status <chr> "accepted", "accepted", "acc…
#> $ countries_name <chr> "Germany", "Germany", "Germa…
#> $ countries_uid <chr> "0x1b", "0x1b", "0x1b", "0x1…
#> $ languages__unique_name <chr> "language_german", "language…
#> $ languages_entry_review_status <chr> "accepted", "accepted", "acc…
#> $ languages_name <chr> "German", "German", "German"…
#> $ languages_uid <chr> "0x37e338", "0x37e338", "0x3…
#> $ related_news_sources__unique_name <chr> "newssource_de_bilanz_print"…
#> $ related_news_sources_entry_review_status <chr> "accepted", NA, NA, NA, NA, …
#> $ related_news_sources_name <chr> "Bilanz", NA, NA, NA, NA, NA…
#> $ related_news_sources_uid <chr> "0x4201f", NA, NA, NA, NA, N…
#> $ `_edited_by_dgraph.type` <list> ["User"], ["User"], ["User"…
#> $ countries_dgraph.type <list> ["Entry", "Country"], ["Ent…
#> $ languages_dgraph.type <list> ["Language", "Entry"], ["La…
#> $ related_news_sources_countries <list<tibble[,5]>> [<tbl_df[2 x 5]…
#> $ related_news_sources_dgraph.type <list> ["Entry", "NewsSource"], <N…
#> $ related_news_sources_channel__unique_name <chr> "print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_name <chr> "Print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_uid <chr> "0x11", NA, NA, NA, NA, NA, …
# Get follower count from external API call:
call_ws <- function(website
){
r <- call_meteor(method = "post",
ressource = "external",
option = "website",
website = website,
format = "raw"
)
return(r)
}
# Apply
res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)
res <- fleece::rectangularize(res)
dplyr::glimpse(res)
#> Rows: 3
#> Columns: 12
#> $ audience_size <chr> "2024-02-12", "2024-02-12", "2024-02-12"
#> $ `audience_size|count` <int> 1896530, 11875, 13338
#> $ `audience_size|data_from` <chr> "https://siterankdata.com/bilanz.de", "https…
#> $ `audience_size|unit` <chr> "daily visitors", "daily visitors", "daily v…
#> $ identifier <chr> "https://www.welt.de/wirtschaft", "https://d…
#> $ name <chr> "bilanz.de", "die-bank.de", "ef-magazin.de"
#> $ alternate_names_1 <chr> "Wirtschaft - News & Aktuelle Nachrichten - …
#> $ alternate_names_2 <chr> "https://www.welt.de/wirtschaft/", NA, "http…
#> $ channel_feeds_1 <chr> NA, NA, "https://ef-magazin.de/feed/atom/"
#> $ channel_feeds_2 <chr> NA, NA, "https://ef-magazin.de/feed/rss/"
#> $ `channel_feeds|kind_0` <chr> NA, NA, "rss"
#> $ `channel_feeds|kind_1` <chr> NA, NA, "rss"