Skip to content

Latest commit

 

History

History
158 lines (132 loc) · 7.51 KB

README.md

File metadata and controls

158 lines (132 loc) · 7.51 KB

meteoR

R-CMD-check

meteoR is an unofficial R wrapper around the OPTED Meteor API.

Installation

You can install the development version of meteoR from GitHub with:

# install.packages("devtools")
devtools::install_github("thieled/meteoR")

Example

This is a basic example which shows you how to solve a common problem:

library(meteoR)

# Get the country uid from the API
countries <- call_meteor(method = "get",
                            ressource = "query",
                            type = "Country",
                            format = "dataframe"
)
#> 50 obs. + 50 obs. + 50 obs. + 50 obs. + 50 obs. + 2 obs.

channel <- call_meteor(method = "get",
                            ressource = "query",
                            type = "Channel",
                            format = "dataframe"
)
#> 22 obs.

channel_website <-  dplyr::filter(channel, `_unique_name` == "website")

countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
                                                        "Germany"))

# Query the API for news sources from these countries
news_sources <- call_meteor(method = "get",
                            ressource = "query",
                            type = "NewsSource",
                            countries = countries_selection$uid,
                            channel = channel_website$uid,
                            geographic_scope = c("national"),
                            format = "dataframe",
                            publication_kind = c("newspaper", "magazine"),
                            n_max = 10, n = 10
)
#> 10 obs.


# Get more insights
view_df <- view_uid(uid = news_sources$uid, 
                    format = "dataframe",
                    unnest_cutoff = 1)
#> 32 obs. 30 obs. 29 obs. 30 obs. 30 obs. 27 obs. 29 obs. 23 obs. 23 obs. 30 obs.


dplyr::glimpse(view_df)
#> Rows: 10
#> Columns: 48
#> $ `_date_created`                           <chr> "2023-02-08T14:16:02.771084+…
#> $ `_unique_name`                            <chr> "newssource_de_bilanzde_webs…
#> $ audience_size_recent                      <int> 3033, 11875, 4096, 42832, 16…
#> $ `audience_size_recent|timestamp`          <chr> "2023-02-08T00:00:00+00:00",…
#> $ `audience_size_recent|unit`               <chr> "daily visitors", "daily vis…
#> $ contains_ads                              <chr> "yes", "yes", "no", "yes", "…
#> $ defunct                                   <lgl> TRUE, FALSE, NA, NA, FALSE, …
#> $ entry_review_status                       <chr> "accepted", "accepted", "acc…
#> $ geographic_scope                          <chr> "national", "national", "nat…
#> $ identifier                                <chr> "https://bilanz.de/", "https…
#> $ name                                      <chr> "bilanz.de", "die-bank.de", …
#> $ party_affiliated                          <chr> "NA", NA, "no", "NA", "yes",…
#> $ payment_model                             <chr> "NA", "partly free", "partly…
#> $ publication_cycle                         <chr> "NA", "continuous", "continu…
#> $ special_interest                          <lgl> TRUE, TRUE, FALSE, FALSE, FA…
#> $ uid                                       <chr> "0x42020", "0x445c6", "0x187…
#> $ verified_account                          <lgl> FALSE, FALSE, NA, NA, NA, NA…
#> $ date_founded                              <chr> NA, NA, NA, "1996-01-01T00:0…
#> $ audience_size                             <list> "2023-02-08T00:00:00+00:00"…
#> $ publication_kind                          <list> "magazine", "magazine", <NU…
#> $ topical_focus                             <list> "economy", "economy", <NULL…
#> $ alternate_names                           <list> <NULL>, <NULL>, <NULL>, <NU…
#> $ `_edited_by__edited_by|timestamp`         <chr> "2023-02-08T14:16:15.015135+…
#> $ `_edited_by_display_name`                 <chr> "Paul Balluff", "Paul Balluf…
#> $ `_edited_by_uid`                          <chr> "0x2711", "0x2711", "0x2711"…
#> $ `audience_size|count_0`                   <int> 3033, 11875, 4096, 42832, 16…
#> $ `audience_size|data_from_0`               <chr> "https://siterankdata.com/bi…
#> $ `audience_size|unit_0`                    <chr> "daily visitors", "daily vis…
#> $ countries__unique_name                    <chr> "country_germany", "country_…
#> $ countries_entry_review_status             <chr> "accepted", "accepted", "acc…
#> $ countries_name                            <chr> "Germany", "Germany", "Germa…
#> $ countries_uid                             <chr> "0x1b", "0x1b", "0x1b", "0x1…
#> $ languages__unique_name                    <chr> "language_german", "language…
#> $ languages_entry_review_status             <chr> "accepted", "accepted", "acc…
#> $ languages_name                            <chr> "German", "German", "German"…
#> $ languages_uid                             <chr> "0x37e338", "0x37e338", "0x3…
#> $ related_news_sources__unique_name         <chr> "newssource_de_bilanz_print"…
#> $ related_news_sources_entry_review_status  <chr> "accepted", NA, NA, NA, NA, …
#> $ related_news_sources_name                 <chr> "Bilanz", NA, NA, NA, NA, NA…
#> $ related_news_sources_uid                  <chr> "0x4201f", NA, NA, NA, NA, N…
#> $ `_edited_by_dgraph.type`                  <list> ["User"], ["User"], ["User"…
#> $ countries_dgraph.type                     <list> ["Entry", "Country"], ["Ent…
#> $ languages_dgraph.type                     <list> ["Language", "Entry"], ["La…
#> $ related_news_sources_countries            <list<tibble[,5]>> [<tbl_df[2 x 5]…
#> $ related_news_sources_dgraph.type          <list> ["Entry", "NewsSource"], <N…
#> $ related_news_sources_channel__unique_name <chr> "print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_name         <chr> "Print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_uid          <chr> "0x11", NA, NA, NA, NA, NA, …



# Get follower count from external API call:
call_ws <- function(website
  ){
    r <- call_meteor(method = "post",
                     ressource = "external",
                     option = "website", 
                     website = website,
                     format = "raw"
    )
    return(r)
  }

# Apply 
res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)
  
res <- fleece::rectangularize(res)
  
dplyr::glimpse(res)
#> Rows: 3
#> Columns: 12
#> $ audience_size             <chr> "2024-02-12", "2024-02-12", "2024-02-12"
#> $ `audience_size|count`     <int> 1896530, 11875, 13338
#> $ `audience_size|data_from` <chr> "https://siterankdata.com/bilanz.de", "https…
#> $ `audience_size|unit`      <chr> "daily visitors", "daily visitors", "daily v…
#> $ identifier                <chr> "https://www.welt.de/wirtschaft", "https://d…
#> $ name                      <chr> "bilanz.de", "die-bank.de", "ef-magazin.de"
#> $ alternate_names_1         <chr> "Wirtschaft - News & Aktuelle Nachrichten - …
#> $ alternate_names_2         <chr> "https://www.welt.de/wirtschaft/", NA, "http…
#> $ channel_feeds_1           <chr> NA, NA, "https://ef-magazin.de/feed/atom/"
#> $ channel_feeds_2           <chr> NA, NA, "https://ef-magazin.de/feed/rss/"
#> $ `channel_feeds|kind_0`    <chr> NA, NA, "rss"
#> $ `channel_feeds|kind_1`    <chr> NA, NA, "rss"