From 57aece6bd5f913c9c898dc12035f90c74c6cb6f8 Mon Sep 17 00:00:00 2001 From: robertoszek Date: Mon, 5 Dec 2022 01:37:11 +0100 Subject: [PATCH] paginate with guest tokens --- docs/gettingstarted/beforerunning.md | 1 - pleroma_bot/_twitter.py | 66 ++++++++++++++++++++++------ pleroma_bot/cli.py | 1 + 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/docs/gettingstarted/beforerunning.md b/docs/gettingstarted/beforerunning.md index 1f6ad3b..abb4f6d 100644 --- a/docs/gettingstarted/beforerunning.md +++ b/docs/gettingstarted/beforerunning.md @@ -18,7 +18,6 @@ The bot will automatically obtain them on its own when no `twitter_token` mappin It has certain limitations, however: -- Only gathers the latest 20 tweets for each account in every run - No access to tweets from protected accounts To get started with Guest Tokens you'll just need to obtain the [Fediverse tokens](/pleroma-bot/gettingstarted/beforerunning/#fediverse-tokens) and create a [configuration file](/pleroma-bot/gettingstarted/configuration/). diff --git a/pleroma_bot/_twitter.py b/pleroma_bot/_twitter.py index dbd613a..ee8b5a7 100644 --- a/pleroma_bot/_twitter.py +++ b/pleroma_bot/_twitter.py @@ -495,20 +495,9 @@ def _get_tweets( "pc": "1", "spelling_corrections": "1", "ext": "mediaStats,highlightedLabel", + "tweet_search_mode": "live", } - - search_url = ( - "https://twitter.com/i/api/2/search/adaptive.json" - ) - response = self.twitter_api_request( - 'GET', - search_url, - headers=self.header_twitter, - params=param, - ) - tweets_guest = response.json()["globalObjects"]["tweets"] - self.result_count += len(tweets_guest) - pbar.update(len(tweets_guest)) + tweets_guest = self._get_tweets_guest(param, pbar) tweets = [] for tweet in tweets_guest: tweets.append(tweets_guest[tweet]) @@ -525,6 +514,57 @@ def _get_tweets( raise ValueError(_("API version not supported: {}").format(version)) +def _get_tweets_guest( + self, param=None, pbar=None, tweets=None, retries=None +): # pragma: todo + if tweets is None: + tweets = {} + if retries is None: + retries = 0 + max_retries = 5 + search_url = ( + "https://twitter.com/i/api/2/search/adaptive.json" + ) + response = self.twitter_api_request( + 'GET', + search_url, + headers=self.header_twitter, + params=param, + ) + resp_json = response.json() + tweets_guest = resp_json["globalObjects"]["tweets"] + insts = resp_json['timeline']['instructions'] + + entries = None + cursor = None + direction = "bottom" + + for idx, inst in enumerate(insts): + if "addEntries" in insts[idx]: + entries = insts[idx]["addEntries"]["entries"] + elif "replaceEntry" in insts[idx]: + entry = insts[idx]["replaceEntry"]["entry"] + if entry['entryId'].startswith(f"sq-cursor-{direction}"): + entries = [entry] + if entries: + for idx, entry in enumerate(entries): + if entry['entryId'].startswith(f"sq-cursor-{direction}"): + cursor = entry["content"]["operation"]["cursor"]["value"] + self.result_count += len(tweets_guest) + if pbar: + pbar.update(len(tweets_guest)) + tweets.update(tweets_guest) + if cursor: + if "cursor" in param: + if param["cursor"] == cursor or len(tweets_guest) == 0: + retries += 1 + param.update({"cursor": cursor}) + if retries <= max_retries: + tweets_guest = self._get_tweets_guest(param, pbar, tweets, retries) + tweets.update(tweets_guest) + return tweets + + def _get_tweets_v2( self, start_time, diff --git a/pleroma_bot/cli.py b/pleroma_bot/cli.py index 5f3f7ae..25d7b78 100644 --- a/pleroma_bot/cli.py +++ b/pleroma_bot/cli.py @@ -55,6 +55,7 @@ class User(object): from ._twitter import _get_tweets from ._twitter import _get_tweets_v2 from ._twitter import _get_twitter_info + from ._twitter import _get_tweets_guest from ._twitter import twitter_api_request from ._twitter import _get_twitter_info_guest