/
premium.R
187 lines (175 loc) 路 6.42 KB
/
premium.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#' Premium Twitter searches
#'
#' Search 30day or fullarchive premium products. There is a limit of 5000 tweets
#' and 25000 for the fullarchive and 30day endpoints respectively. In addition,
#' there are some limits in the number of requests that are possible on a
#' certain amount of time, this have already been taken into account.
#' See the info provided by Twitter and the "Developer Account" section.
#'
#' Note: The `env_name` must match the ones you set up for the token you are using.
#' `r lifecycle::badge("deprecated")`
#'
#' @inheritParams TWIT_paginate_max_id
#' @inheritParams stream
#' @param q Search query on which to match/filter tweets. See details for
#' information about available search operators.
#' @param continue A character string with the next results of a query. You
#' must make the exact same query as the original, including `q`, `toDate`,
#' and `fromDate`.
#' @param premium A logical value if the environment is paid (TRUE) or
#' sandboxed, the default (FALSE). It limits the number of results retrieved so the number
#' of API queries needed to retrieve `n` results.
#' @param fromDate Oldest date-time (YYYYMMDDHHMM) from which tweets should be
#' searched for.
#' @param toDate Newest date-time (YYYYMMDDHHMM) from which tweets should be
#' searched for.
#' @param env_name Name/label of developer environment to use for the search.
#' @param safedir Name of directory to which each response object should be
#' saved. If the directory doesn't exist, it will be created. If NULL (the
#' default) then a dir will be created in the current working directory. To
#' override/deactivate safedir set this to FALSE.
#'
#' @section Developer Account:
#' Users must have an approved developer account and an active/labeled
#' environment to access Twitter's premium APIs. For more information, to check
#' your current Subscriptions and Dev Environments, or to apply for a developer
#' account visit <https://developer.twitter.com>.
#'
#' @section Search operators:
#' *Note: Bolded operators ending with a colon should be immediately
#' followed by a word or quoted phrase (if appropriate)鈥揺.g.,* `lang:en`
#'
#' @section Keyword:
#' \itemize{
#' \item **""** ~~ match exact phrase
#' \item **#** ~~ hashtag
#' \item **@@** ~~ at mentions)
#' \item **url:** ~~ found in URL
#' \item **lang:** ~~ language of tweet
#' }
#'
#' @section Accounts of interest:
#' \itemize{
#' \item **from:** ~~ authored by
#' \item **to:** ~~ sent to
#' \item **retweets_of:** ~~ retweet author
#' }
#'
#' @section Tweet attributes:
#' \itemize{
#' \item **is:retweet** ~~ only retweets
#' \item **has:mentions** ~~ uses mention(s)
#' \item **has:hashtags** ~~ uses hashtags(s)
#' \item **has:media** ~~ includes media(s)
#' \item **has:videos** ~~ includes video(s)
#' \item **has:images** ~~ includes image(s)
#' \item **has:links** ~~ includes URL(s)
#' \item **is:verified** ~~ from verified accounts
#' }
#'
#' @section Geospatial:
#' \itemize{
#' \item **bounding_box:\[west_long south_lat east_long north_lat\]** ~~ lat/long coordinates box
#' \item **point_radius:\[lon lat radius\]** ~~ center of search radius
#' \item **has:geo** ~~ uses geotagging
#' \item **place:** ~~ by place
#' \item **place_country:** ~~ by country
#' \item **has:profile_geo** ~~ geo associated with profile
#' \item **profile_country:** ~~ country associated with profile
#' \item **profile_region:** ~~ region associated with profile
#' \item **profile_locality:** ~~ locality associated with profile
#' }
#'
#' @return A tibble data frame of Twitter data.
#' @family premium endpoints
#' @seealso [tweet_search_recent()], [tweet_search_all()], [`rtweet-deprecated`]
#' @export
search_fullarchive <- function(q, n = 100, fromDate = NULL, toDate = NULL,
continue = NULL,
env_name = NULL, premium = FALSE,
safedir = NULL, parse = TRUE, token = NULL) {
search_premium("fullarchive",
q = q,
n = n,
fromDate = fromDate,
toDate = toDate,
env_name = env_name,
continue = continue,
premium = premium,
safedir = safedir,
parse = parse,
token = token
)
}
#' @rdname search_fullarchive
#' @export
search_30day <- function(q, n = 100, fromDate = NULL, toDate = NULL,
env_name = NULL,
continue = NULL, premium = FALSE,
safedir = NULL,
parse = TRUE,
token = NULL) {
search_premium("30day",
q = q,
n = n,
fromDate = fromDate,
toDate = toDate,
env_name = env_name,
continue = continue,
premium = premium,
safedir = safedir,
parse = parse,
token = token
)
}
search_premium <- function(product, q, n = NULL, fromDate = NULL, toDate = NULL,
env_name = NULL, continue = NULL, premium = FALSE, safedir = NULL,
parse = TRUE,
token = NULL) {
if (is.null(env_name)) {
stop("Must provide dev environment name")
}
if (!is.null(safedir)) {
stop("`safedir` temporarily not supported")
}
if (!is_logical(premium)) {
stop("premium must be either TRUE or FALSE.", call. = FALSE)
}
params <- list(query = q,
maxResults = n,
fromDate = format_from_to_date(fromDate),
# tag = ?? Not sure how to support tags or how they are used.
toDate = format_from_to_date(toDate)
)
api <- paste0("/1.1/tweets/search/", product, "/", env_name)
result <- TWIT_paginate_premium(token, api, params, n = n, cursor = continue,
page_size = if (premium) 500 else 100)
if (parse) {
cursor <- attr(result, "next")
result <- tweets_with_users(result)
result$created_at <- format_date(result$created_at)
attr(result, "next") <- cursor
}
result
}
format_from_to_date <- function(x = NULL) {
if (is.null(x)) {
return(NULL)
}
if (length(x) > 1L) {
stop("Can only provide one value to fromDate/toDate", call. = FALSE)
}
if (is.character(x) && grepl("-", x) && nchar(x) > 11) {
x <- as.POSIXct(x)
}
if (is.character(x) && grepl("-", x)) {
x <- as.Date(x)
}
if (inherits(x, "Date")) {
x <- as.POSIXct(x)
}
if (inherits(x, "POSIXct")) {
x <- format(x, "%Y%m%d%H%M")
}
x
}