diff --git a/src/api.py b/src/api.py index 206a75e..04ea49c 100644 --- a/src/api.py +++ b/src/api.py @@ -2,10 +2,15 @@ import pandas import pytz -import tweepy + +from auth import TwitterAuthKeys, auth_twitter_api from .filters import filter_user -from .graphs import make_daily_tweet_users_graph, make_daily_tweets_graph +from .graphs import ( + make_daily_tweet_users_graph, + make_daily_tweets_graph, + make_hourly_tweets_graph, +) from .loggers import get_logger, set_logger_timezone from .rankings import make_user_ranking, print_user_rankings from .tweets import search_tweets @@ -19,10 +24,11 @@ class TwiVisAPI: def __init__( self, api_key, api_secret, access_token, access_token_secret, timezone="UTC" ): - auth = tweepy.OAuthHandler(api_key, api_secret) - auth.set_access_token(access_token, access_token_secret) - self._api = tweepy.API( - auth, retry_count=3, retry_delay=1, wait_on_rate_limit=True + self._auth_keys = TwitterAuthKeys( + api_key=api_key, + api_secret=api_secret, + access_token=access_token, + access_token_secret=access_token_secret, ) self._df = None self._search_word = None @@ -34,7 +40,7 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None logger.info("=== search_tweets Start") search_query = search_word + " " + advanced_query tweets = search_tweets( - api=self._api, + api=auth_twitter_api(auth_keys=self._auth_keys), search_query=search_query, limit=limit, timezone=self._timezone, @@ -47,7 +53,8 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None def set_followers(self, user_screen_name): logger.info("=== set_followers Start") follower_ids = get_follower_ids( - api=self._api, user_screen_name=user_screen_name + api=auth_twitter_api(auth_keys=self._auth_keys), + user_screen_name=user_screen_name, ) self._df["follower"] = self._df.apply( lambda x: x.user_id in follower_ids, axis=1 @@ -57,7 +64,8 @@ def set_followers(self, user_screen_name): def set_following(self, user_screen_name): logger.info("=== set_following Start") following_ids = get_following_ids( - api=self._api, user_screen_name=user_screen_name + api=auth_twitter_api(auth_keys=self._auth_keys), + user_screen_name=user_screen_name, ) self._df["following"] = self._df.apply( lambda x: x.user_id in following_ids, axis=1 @@ -80,6 +88,14 @@ def make_daily_tweet_users_graph(self, **kwargs): ) figure.show() + def make_hourly_tweets_graph(self, **kwargs): + validate_tweet_exists(self._df) + _df = filter_user(self._df, **kwargs) + figure = make_hourly_tweets_graph( + _df, search_word=self._search_word, timezone=self._timezone + ) + figure.show() + def make_tweets_user_ranking(self, **kwargs): validate_tweet_exists(self._df) rankings = make_user_ranking( diff --git a/src/auth.py b/src/auth.py new file mode 100644 index 0000000..cc0933d --- /dev/null +++ b/src/auth.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass + +import tweepy + + +@dataclass +class TwitterAuthKeys: + api_key: str + api_secret: str + access_token: str + access_token_secret: str + + +def auth_twitter_api(auth_keys: TwitterAuthKeys): + _auth = tweepy.OAuthHandler(auth_keys.api_key, auth_keys.api_secret) + _auth.set_access_token(auth_keys.access_token, auth_keys.access_token_secret) + return tweepy.API( + _auth, + retry_count=10, + retry_delay=60, + wait_on_rate_limit=True, + timeout=120, + wait_on_rate_limit_notify=True, + ) diff --git a/src/constants.py b/src/constants.py index 7c47ea0..969e1bd 100644 --- a/src/constants.py +++ b/src/constants.py @@ -32,3 +32,5 @@ class FfRatioOrderModes(Enum): FOLLOWER_IDS_API_PATH: "https://api.twitter.com/1.1/followers/ids.json", FRIEND_IDS_API_PATH: "https://api.twitter.com/1.1/friends/ids.json", } + +RETRY_COUNT = 10 diff --git a/src/graphs.py b/src/graphs.py index 391878d..4714590 100644 --- a/src/graphs.py +++ b/src/graphs.py @@ -2,6 +2,7 @@ import plotly.express from .processors import ( + make_count_tweeted_df, make_count_tweeted_weekday_df, make_title, make_tweet_user_weekday_max_hour_df, @@ -53,6 +54,29 @@ def make_daily_tweet_users_graph(df: pandas.DataFrame, search_word: str, timezon return fig +def make_hourly_tweets_graph(df: pandas.DataFrame, search_word: str, timezone): + """時間別のツイート数を折れ線グラフで出力する + + :param df: 集計対象のDataFrame + :param search_word: タイトルに表示する検索ワード + :param timezone: timezoneオブジェクト + """ + _df = make_count_tweeted_df(df, timezone=timezone, group_col="tweeted_wh") + _total_count = _df.sum()["count"] + fig = plot_line( + _df, + x_col="tweeted_wh", + x_label="ツイート時間", + y_col="count", + y_label="ツイート人数", + title=make_title( + df, main_title="時間別ツイート数", count=_total_count, search_word=search_word + ), + ) + fig.update_xaxes(tickangle=-90) + return fig + + def plot_line( df: pandas.DataFrame, x_col: str, y_col: str, x_label: str, y_label: str, title: str ): @@ -70,7 +94,6 @@ def plot_line( df, x=x_col, y=y_col, - text=y_col, title=title, labels={ y_col: y_label, diff --git a/src/processors.py b/src/processors.py index 9607f72..19fd96b 100644 --- a/src/processors.py +++ b/src/processors.py @@ -35,6 +35,10 @@ def make_weekday(dt: datetime, timezone) -> str: return f"{dstr}({WEEKDAYS[dt.weekday()]})" +def make_weekday_hour(weekday, hour): + return f"{weekday} {hour}" + + def make_tweeted_weekday_range(timezone) -> List[str]: """グラフに描画する曜日付き日付ラベルの範囲を生成する @@ -66,6 +70,24 @@ def make_count_tweeted_weekday_df(df: pandas.DataFrame, timezone) -> pandas.Data return _df.sort_index().reset_index() +def make_count_tweeted_df( + df: pandas.DataFrame, timezone, group_col +) -> pandas.DataFrame: + """日付別にツイート数をカウントしたDataFrameを生成する + + :param df: 対象のDataFrame + :param timezone: timezoneオブジェクト + :return 日付別ツイート数DataFrame + """ + _df = df.groupby(group_col)["tweet_id"].agg(count="count") + for wd in make_tweeted_weekday_hour_label_range(timezone=timezone): + if wd not in _df.index: + _zero_df = pandas.DataFrame([0], index=[wd], columns=["count"]) + _zero_df.index.name = group_col + _df = _df.append(_zero_df) + return _df.sort_index().reset_index() + + def make_tweeted_hour_label_range() -> List[str]: """グラフに描画する時間ラベルの範囲を生成する @@ -74,6 +96,19 @@ def make_tweeted_hour_label_range() -> List[str]: return [str(i).zfill(2) for i in range(24)] +def make_tweeted_weekday_hour_label_range(timezone) -> List[str]: + """グラフに描画する日付・時間ラベルの範囲を生成する + + :return 時間ラベルのリスト + """ + labels = [] + for w in make_tweeted_weekday_range(timezone): + for h in make_tweeted_hour_label_range(): + labels.append(make_weekday_hour(weekday=w, hour=h)) + + return labels + + def make_count_tweeted_hour_df(df: pandas.DataFrame) -> pandas.DataFrame: """時間別にツイート数をカウントしたDataFrameを生成する diff --git a/src/tweets.py b/src/tweets.py index af4244e..465623b 100644 --- a/src/tweets.py +++ b/src/tweets.py @@ -6,10 +6,17 @@ import pytz import tweepy -from .constants import API_COUNTS, FULL_TEXT_TWEET_MODE, SEARCH_API_PATH, TODAY_EXCLUDED -from .limits import get_rate_limit_reset_time, is_rate_limit +from auth import TwitterAuthKeys, auth_twitter_api + +from .constants import ( + API_COUNTS, + FULL_TEXT_TWEET_MODE, + RETRY_COUNT, + SEARCH_API_PATH, + TODAY_EXCLUDED, +) from .loggers import get_logger -from .processors import make_weekday +from .processors import make_weekday, make_weekday_hour logger = get_logger(__name__, loglevel=logging.INFO) @@ -39,12 +46,17 @@ def search_tweets( tweets = [] next_max_tweet_id = None limited = False - while True: - if is_rate_limit(api, api_path=SEARCH_API_PATH): - reset_time = get_rate_limit_reset_time(api, api_path=SEARCH_API_PATH) - logger.info(f"アクセス上限のため処理休止中({reset_time}秒)..") - time.sleep(reset_time) + # リトライ用に退避 + auth_keys = TwitterAuthKeys( + api_key=api.auth.consumer_key, + api_secret=api.auth.consumer_secret, + access_token=api.auth.access_token, + access_token_secret=api.auth.access_token_secret, + ) + + retry_count = 0 + while True: _tweets = [] try: _tweets = api.search( @@ -53,9 +65,16 @@ def search_tweets( count=API_COUNTS[SEARCH_API_PATH], max_id=next_max_tweet_id, ) - except tweepy.RateLimitError: - logger.info("アクセス上限のため処理休止中(15分)..") - time.sleep(15 * 60) + retry_count = 0 + + except Exception as e: + if retry_count > RETRY_COUNT: + raise e + + logger.info("ReadTimeout occurred and re-authenticated.") + api = auth_twitter_api(auth_keys=auth_keys) + retry_count += 1 + continue # 取得するツイートがなくなった場合に処理終了 if len(_tweets) == 0: @@ -67,12 +86,17 @@ def search_tweets( else: dt = _convert_timezone(t.created_at, timezone=timezone) + tweeted_weekday = make_weekday(dt, timezone=timezone) + tweeted_hour = dt.strftime("%H") tweets.append( { "tweeted_dt": dt, "tweeted_date": dt.date(), - "tweeted_weekday": make_weekday(dt, timezone=timezone), - "tweeted_hour": dt.strftime("%H"), + "tweeted_weekday": tweeted_weekday, + "tweeted_hour": tweeted_hour, + "tweeted_wh": make_weekday_hour( + weekday=tweeted_weekday, hour=tweeted_hour + ), "tweet_id": t.id, "favorite_count": t.favorite_count, "retweet_count": t.retweet_count,