✨ feat(#46): Added hourly graph.

ryoma116 · May 21, 2021 · b15fc25 · b15fc25
1 parent f7513c6
commit b15fc25
Show file tree

Hide file tree

Showing 6 changed files with 147 additions and 23 deletions.
diff --git a/src/api.py b/src/api.py
@@ -2,10 +2,15 @@
 
 import pandas
 import pytz
-import tweepy
+
+from auth import TwitterAuthKeys, auth_twitter_api
 
 from .filters import filter_user
-from .graphs import make_daily_tweet_users_graph, make_daily_tweets_graph
+from .graphs import (
+    make_daily_tweet_users_graph,
+    make_daily_tweets_graph,
+    make_hourly_tweets_graph,
+)
 from .loggers import get_logger, set_logger_timezone
 from .rankings import make_user_ranking, print_user_rankings
 from .tweets import search_tweets
@@ -19,10 +24,11 @@ class TwiVisAPI:
     def __init__(
         self, api_key, api_secret, access_token, access_token_secret, timezone="UTC"
     ):
-        auth = tweepy.OAuthHandler(api_key, api_secret)
-        auth.set_access_token(access_token, access_token_secret)
-        self._api = tweepy.API(
-            auth, retry_count=3, retry_delay=1, wait_on_rate_limit=True
+        self._auth_keys = TwitterAuthKeys(
+            api_key=api_key,
+            api_secret=api_secret,
+            access_token=access_token,
+            access_token_secret=access_token_secret,
         )
         self._df = None
         self._search_word = None
@@ -34,7 +40,7 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None
         logger.info("=== search_tweets Start")
         search_query = search_word + " " + advanced_query
         tweets = search_tweets(
-            api=self._api,
+            api=auth_twitter_api(auth_keys=self._auth_keys),
             search_query=search_query,
             limit=limit,
             timezone=self._timezone,
@@ -47,7 +53,8 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None
     def set_followers(self, user_screen_name):
         logger.info("=== set_followers Start")
         follower_ids = get_follower_ids(
-            api=self._api, user_screen_name=user_screen_name
+            api=auth_twitter_api(auth_keys=self._auth_keys),
+            user_screen_name=user_screen_name,
         )
         self._df["follower"] = self._df.apply(
             lambda x: x.user_id in follower_ids, axis=1
@@ -57,7 +64,8 @@ def set_followers(self, user_screen_name):
     def set_following(self, user_screen_name):
         logger.info("=== set_following Start")
         following_ids = get_following_ids(
-            api=self._api, user_screen_name=user_screen_name
+            api=auth_twitter_api(auth_keys=self._auth_keys),
+            user_screen_name=user_screen_name,
         )
         self._df["following"] = self._df.apply(
             lambda x: x.user_id in following_ids, axis=1
@@ -80,6 +88,14 @@ def make_daily_tweet_users_graph(self, **kwargs):
         )
         figure.show()
 
+    def make_hourly_tweets_graph(self, **kwargs):
+        validate_tweet_exists(self._df)
+        _df = filter_user(self._df, **kwargs)
+        figure = make_hourly_tweets_graph(
+            _df, search_word=self._search_word, timezone=self._timezone
+        )
+        figure.show()
+
     def make_tweets_user_ranking(self, **kwargs):
         validate_tweet_exists(self._df)
         rankings = make_user_ranking(

diff --git a/src/auth.py b/src/auth.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+
+import tweepy
+
+
+@dataclass
+class TwitterAuthKeys:
+    api_key: str
+    api_secret: str
+    access_token: str
+    access_token_secret: str
+
+
+def auth_twitter_api(auth_keys: TwitterAuthKeys):
+    _auth = tweepy.OAuthHandler(auth_keys.api_key, auth_keys.api_secret)
+    _auth.set_access_token(auth_keys.access_token, auth_keys.access_token_secret)
+    return tweepy.API(
+        _auth,
+        retry_count=10,
+        retry_delay=60,
+        wait_on_rate_limit=True,
+        timeout=120,
+        wait_on_rate_limit_notify=True,
+    )
diff --git a/src/constants.py b/src/constants.py
@@ -32,3 +32,5 @@ class FfRatioOrderModes(Enum):
     FOLLOWER_IDS_API_PATH: "https://api.twitter.com/1.1/followers/ids.json",
     FRIEND_IDS_API_PATH: "https://api.twitter.com/1.1/friends/ids.json",
 }
+
+RETRY_COUNT = 10
diff --git a/src/graphs.py b/src/graphs.py
@@ -2,6 +2,7 @@
 import plotly.express
 
 from .processors import (
+    make_count_tweeted_df,
     make_count_tweeted_weekday_df,
     make_title,
     make_tweet_user_weekday_max_hour_df,
@@ -53,6 +54,29 @@ def make_daily_tweet_users_graph(df: pandas.DataFrame, search_word: str, timezon
     return fig
 
 
+def make_hourly_tweets_graph(df: pandas.DataFrame, search_word: str, timezone):
+    """時間別のツイート数を折れ線グラフで出力する
+
+    :param df: 集計対象のDataFrame
+    :param search_word: タイトルに表示する検索ワード
+    :param timezone: timezoneオブジェクト
+    """
+    _df = make_count_tweeted_df(df, timezone=timezone, group_col="tweeted_wh")
+    _total_count = _df.sum()["count"]
+    fig = plot_line(
+        _df,
+        x_col="tweeted_wh",
+        x_label="ツイート時間",
+        y_col="count",
+        y_label="ツイート人数",
+        title=make_title(
+            df, main_title="時間別ツイート数", count=_total_count, search_word=search_word
+        ),
+    )
+    fig.update_xaxes(tickangle=-90)
+    return fig
+
+
 def plot_line(
     df: pandas.DataFrame, x_col: str, y_col: str, x_label: str, y_label: str, title: str
 ):
@@ -70,7 +94,6 @@ def plot_line(
         df,
         x=x_col,
         y=y_col,
-        text=y_col,
         title=title,
         labels={
             y_col: y_label,

diff --git a/src/processors.py b/src/processors.py
@@ -35,6 +35,10 @@ def make_weekday(dt: datetime, timezone) -> str:
         return f"{dstr}({WEEKDAYS[dt.weekday()]})"
 
 
+def make_weekday_hour(weekday, hour):
+    return f"{weekday} {hour}"
+
+
 def make_tweeted_weekday_range(timezone) -> List[str]:
     """グラフに描画する曜日付き日付ラベルの範囲を生成する
 
@@ -66,6 +70,24 @@ def make_count_tweeted_weekday_df(df: pandas.DataFrame, timezone) -> pandas.Data
     return _df.sort_index().reset_index()
 
 
+def make_count_tweeted_df(
+    df: pandas.DataFrame, timezone, group_col
+) -> pandas.DataFrame:
+    """日付別にツイート数をカウントしたDataFrameを生成する
+
+    :param df: 対象のDataFrame
+    :param timezone: timezoneオブジェクト
+    :return 日付別ツイート数DataFrame
+    """
+    _df = df.groupby(group_col)["tweet_id"].agg(count="count")
+    for wd in make_tweeted_weekday_hour_label_range(timezone=timezone):
+        if wd not in _df.index:
+            _zero_df = pandas.DataFrame([0], index=[wd], columns=["count"])
+            _zero_df.index.name = group_col
+            _df = _df.append(_zero_df)
+    return _df.sort_index().reset_index()
+
+
 def make_tweeted_hour_label_range() -> List[str]:
     """グラフに描画する時間ラベルの範囲を生成する
 
@@ -74,6 +96,19 @@ def make_tweeted_hour_label_range() -> List[str]:
     return [str(i).zfill(2) for i in range(24)]
 
 
+def make_tweeted_weekday_hour_label_range(timezone) -> List[str]:
+    """グラフに描画する日付・時間ラベルの範囲を生成する
+
+    :return 時間ラベルのリスト
+    """
+    labels = []
+    for w in make_tweeted_weekday_range(timezone):
+        for h in make_tweeted_hour_label_range():
+            labels.append(make_weekday_hour(weekday=w, hour=h))
+
+    return labels
+
+
 def make_count_tweeted_hour_df(df: pandas.DataFrame) -> pandas.DataFrame:
     """時間別にツイート数をカウントしたDataFrameを生成する
 

diff --git a/src/tweets.py b/src/tweets.py
@@ -6,10 +6,17 @@
 import pytz
 import tweepy
 
-from .constants import API_COUNTS, FULL_TEXT_TWEET_MODE, SEARCH_API_PATH, TODAY_EXCLUDED
-from .limits import get_rate_limit_reset_time, is_rate_limit
+from auth import TwitterAuthKeys, auth_twitter_api
+
+from .constants import (
+    API_COUNTS,
+    FULL_TEXT_TWEET_MODE,
+    RETRY_COUNT,
+    SEARCH_API_PATH,
+    TODAY_EXCLUDED,
+)
 from .loggers import get_logger
-from .processors import make_weekday
+from .processors import make_weekday, make_weekday_hour
 
 logger = get_logger(__name__, loglevel=logging.INFO)
 
@@ -39,12 +46,17 @@ def search_tweets(
     tweets = []
     next_max_tweet_id = None
     limited = False
-    while True:
-        if is_rate_limit(api, api_path=SEARCH_API_PATH):
-            reset_time = get_rate_limit_reset_time(api, api_path=SEARCH_API_PATH)
-            logger.info(f"アクセス上限のため処理休止中({reset_time}秒)..")
-            time.sleep(reset_time)
 
+    # リトライ用に退避
+    auth_keys = TwitterAuthKeys(
+        api_key=api.auth.consumer_key,
+        api_secret=api.auth.consumer_secret,
+        access_token=api.auth.access_token,
+        access_token_secret=api.auth.access_token_secret,
+    )
+
+    retry_count = 0
+    while True:
         _tweets = []
         try:
             _tweets = api.search(
@@ -53,9 +65,16 @@ def search_tweets(
                 count=API_COUNTS[SEARCH_API_PATH],
                 max_id=next_max_tweet_id,
             )
-        except tweepy.RateLimitError:
-            logger.info("アクセス上限のため処理休止中(15分)..")
-            time.sleep(15 * 60)
+            retry_count = 0
+
+        except Exception as e:
+            if retry_count > RETRY_COUNT:
+                raise e
+
+            logger.info("ReadTimeout occurred and re-authenticated.")
+            api = auth_twitter_api(auth_keys=auth_keys)
+            retry_count += 1
+            continue
 
         # 取得するツイートがなくなった場合に処理終了
         if len(_tweets) == 0:
@@ -67,12 +86,17 @@ def search_tweets(
             else:
                 dt = _convert_timezone(t.created_at, timezone=timezone)
 
+            tweeted_weekday = make_weekday(dt, timezone=timezone)
+            tweeted_hour = dt.strftime("%H")
             tweets.append(
                 {
                     "tweeted_dt": dt,
                     "tweeted_date": dt.date(),
-                    "tweeted_weekday": make_weekday(dt, timezone=timezone),
-                    "tweeted_hour": dt.strftime("%H"),
+                    "tweeted_weekday": tweeted_weekday,
+                    "tweeted_hour": tweeted_hour,
+                    "tweeted_wh": make_weekday_hour(
+                        weekday=tweeted_weekday, hour=tweeted_hour
+                    ),
                     "tweet_id": t.id,
                     "favorite_count": t.favorite_count,
                     "retweet_count": t.retweet_count,