Skip to content

Commit

Permalink
✨ feat(#46): Added hourly graph.
Browse files Browse the repository at this point in the history
  • Loading branch information
ryoma116 committed May 21, 2021
1 parent f7513c6 commit b15fc25
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 23 deletions.
34 changes: 25 additions & 9 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

import pandas
import pytz
import tweepy

from auth import TwitterAuthKeys, auth_twitter_api

from .filters import filter_user
from .graphs import make_daily_tweet_users_graph, make_daily_tweets_graph
from .graphs import (
make_daily_tweet_users_graph,
make_daily_tweets_graph,
make_hourly_tweets_graph,
)
from .loggers import get_logger, set_logger_timezone
from .rankings import make_user_ranking, print_user_rankings
from .tweets import search_tweets
Expand All @@ -19,10 +24,11 @@ class TwiVisAPI:
def __init__(
self, api_key, api_secret, access_token, access_token_secret, timezone="UTC"
):
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
self._api = tweepy.API(
auth, retry_count=3, retry_delay=1, wait_on_rate_limit=True
self._auth_keys = TwitterAuthKeys(
api_key=api_key,
api_secret=api_secret,
access_token=access_token,
access_token_secret=access_token_secret,
)
self._df = None
self._search_word = None
Expand All @@ -34,7 +40,7 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None
logger.info("=== search_tweets Start")
search_query = search_word + " " + advanced_query
tweets = search_tweets(
api=self._api,
api=auth_twitter_api(auth_keys=self._auth_keys),
search_query=search_query,
limit=limit,
timezone=self._timezone,
Expand All @@ -47,7 +53,8 @@ def search_tweets(self, search_word: str, advanced_query: str, limit: int = None
def set_followers(self, user_screen_name):
logger.info("=== set_followers Start")
follower_ids = get_follower_ids(
api=self._api, user_screen_name=user_screen_name
api=auth_twitter_api(auth_keys=self._auth_keys),
user_screen_name=user_screen_name,
)
self._df["follower"] = self._df.apply(
lambda x: x.user_id in follower_ids, axis=1
Expand All @@ -57,7 +64,8 @@ def set_followers(self, user_screen_name):
def set_following(self, user_screen_name):
logger.info("=== set_following Start")
following_ids = get_following_ids(
api=self._api, user_screen_name=user_screen_name
api=auth_twitter_api(auth_keys=self._auth_keys),
user_screen_name=user_screen_name,
)
self._df["following"] = self._df.apply(
lambda x: x.user_id in following_ids, axis=1
Expand All @@ -80,6 +88,14 @@ def make_daily_tweet_users_graph(self, **kwargs):
)
figure.show()

def make_hourly_tweets_graph(self, **kwargs):
validate_tweet_exists(self._df)
_df = filter_user(self._df, **kwargs)
figure = make_hourly_tweets_graph(
_df, search_word=self._search_word, timezone=self._timezone
)
figure.show()

def make_tweets_user_ranking(self, **kwargs):
validate_tweet_exists(self._df)
rankings = make_user_ranking(
Expand Down
24 changes: 24 additions & 0 deletions src/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from dataclasses import dataclass

import tweepy


@dataclass
class TwitterAuthKeys:
api_key: str
api_secret: str
access_token: str
access_token_secret: str


def auth_twitter_api(auth_keys: TwitterAuthKeys):
_auth = tweepy.OAuthHandler(auth_keys.api_key, auth_keys.api_secret)
_auth.set_access_token(auth_keys.access_token, auth_keys.access_token_secret)
return tweepy.API(
_auth,
retry_count=10,
retry_delay=60,
wait_on_rate_limit=True,
timeout=120,
wait_on_rate_limit_notify=True,
)
2 changes: 2 additions & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ class FfRatioOrderModes(Enum):
FOLLOWER_IDS_API_PATH: "https://api.twitter.com/1.1/followers/ids.json",
FRIEND_IDS_API_PATH: "https://api.twitter.com/1.1/friends/ids.json",
}

RETRY_COUNT = 10
25 changes: 24 additions & 1 deletion src/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import plotly.express

from .processors import (
make_count_tweeted_df,
make_count_tweeted_weekday_df,
make_title,
make_tweet_user_weekday_max_hour_df,
Expand Down Expand Up @@ -53,6 +54,29 @@ def make_daily_tweet_users_graph(df: pandas.DataFrame, search_word: str, timezon
return fig


def make_hourly_tweets_graph(df: pandas.DataFrame, search_word: str, timezone):
"""時間別のツイート数を折れ線グラフで出力する
:param df: 集計対象のDataFrame
:param search_word: タイトルに表示する検索ワード
:param timezone: timezoneオブジェクト
"""
_df = make_count_tweeted_df(df, timezone=timezone, group_col="tweeted_wh")
_total_count = _df.sum()["count"]
fig = plot_line(
_df,
x_col="tweeted_wh",
x_label="ツイート時間",
y_col="count",
y_label="ツイート人数",
title=make_title(
df, main_title="時間別ツイート数", count=_total_count, search_word=search_word
),
)
fig.update_xaxes(tickangle=-90)
return fig


def plot_line(
df: pandas.DataFrame, x_col: str, y_col: str, x_label: str, y_label: str, title: str
):
Expand All @@ -70,7 +94,6 @@ def plot_line(
df,
x=x_col,
y=y_col,
text=y_col,
title=title,
labels={
y_col: y_label,
Expand Down
35 changes: 35 additions & 0 deletions src/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def make_weekday(dt: datetime, timezone) -> str:
return f"{dstr}({WEEKDAYS[dt.weekday()]})"


def make_weekday_hour(weekday, hour):
return f"{weekday} {hour}"


def make_tweeted_weekday_range(timezone) -> List[str]:
"""グラフに描画する曜日付き日付ラベルの範囲を生成する
Expand Down Expand Up @@ -66,6 +70,24 @@ def make_count_tweeted_weekday_df(df: pandas.DataFrame, timezone) -> pandas.Data
return _df.sort_index().reset_index()


def make_count_tweeted_df(
df: pandas.DataFrame, timezone, group_col
) -> pandas.DataFrame:
"""日付別にツイート数をカウントしたDataFrameを生成する
:param df: 対象のDataFrame
:param timezone: timezoneオブジェクト
:return 日付別ツイート数DataFrame
"""
_df = df.groupby(group_col)["tweet_id"].agg(count="count")
for wd in make_tweeted_weekday_hour_label_range(timezone=timezone):
if wd not in _df.index:
_zero_df = pandas.DataFrame([0], index=[wd], columns=["count"])
_zero_df.index.name = group_col
_df = _df.append(_zero_df)
return _df.sort_index().reset_index()


def make_tweeted_hour_label_range() -> List[str]:
"""グラフに描画する時間ラベルの範囲を生成する
Expand All @@ -74,6 +96,19 @@ def make_tweeted_hour_label_range() -> List[str]:
return [str(i).zfill(2) for i in range(24)]


def make_tweeted_weekday_hour_label_range(timezone) -> List[str]:
"""グラフに描画する日付・時間ラベルの範囲を生成する
:return 時間ラベルのリスト
"""
labels = []
for w in make_tweeted_weekday_range(timezone):
for h in make_tweeted_hour_label_range():
labels.append(make_weekday_hour(weekday=w, hour=h))

return labels


def make_count_tweeted_hour_df(df: pandas.DataFrame) -> pandas.DataFrame:
"""時間別にツイート数をカウントしたDataFrameを生成する
Expand Down
50 changes: 37 additions & 13 deletions src/tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@
import pytz
import tweepy

from .constants import API_COUNTS, FULL_TEXT_TWEET_MODE, SEARCH_API_PATH, TODAY_EXCLUDED
from .limits import get_rate_limit_reset_time, is_rate_limit
from auth import TwitterAuthKeys, auth_twitter_api

from .constants import (
API_COUNTS,
FULL_TEXT_TWEET_MODE,
RETRY_COUNT,
SEARCH_API_PATH,
TODAY_EXCLUDED,
)
from .loggers import get_logger
from .processors import make_weekday
from .processors import make_weekday, make_weekday_hour

logger = get_logger(__name__, loglevel=logging.INFO)

Expand Down Expand Up @@ -39,12 +46,17 @@ def search_tweets(
tweets = []
next_max_tweet_id = None
limited = False
while True:
if is_rate_limit(api, api_path=SEARCH_API_PATH):
reset_time = get_rate_limit_reset_time(api, api_path=SEARCH_API_PATH)
logger.info(f"アクセス上限のため処理休止中({reset_time}秒)..")
time.sleep(reset_time)

# リトライ用に退避
auth_keys = TwitterAuthKeys(
api_key=api.auth.consumer_key,
api_secret=api.auth.consumer_secret,
access_token=api.auth.access_token,
access_token_secret=api.auth.access_token_secret,
)

retry_count = 0
while True:
_tweets = []
try:
_tweets = api.search(
Expand All @@ -53,9 +65,16 @@ def search_tweets(
count=API_COUNTS[SEARCH_API_PATH],
max_id=next_max_tweet_id,
)
except tweepy.RateLimitError:
logger.info("アクセス上限のため処理休止中(15分)..")
time.sleep(15 * 60)
retry_count = 0

except Exception as e:
if retry_count > RETRY_COUNT:
raise e

logger.info("ReadTimeout occurred and re-authenticated.")
api = auth_twitter_api(auth_keys=auth_keys)
retry_count += 1
continue

# 取得するツイートがなくなった場合に処理終了
if len(_tweets) == 0:
Expand All @@ -67,12 +86,17 @@ def search_tweets(
else:
dt = _convert_timezone(t.created_at, timezone=timezone)

tweeted_weekday = make_weekday(dt, timezone=timezone)
tweeted_hour = dt.strftime("%H")
tweets.append(
{
"tweeted_dt": dt,
"tweeted_date": dt.date(),
"tweeted_weekday": make_weekday(dt, timezone=timezone),
"tweeted_hour": dt.strftime("%H"),
"tweeted_weekday": tweeted_weekday,
"tweeted_hour": tweeted_hour,
"tweeted_wh": make_weekday_hour(
weekday=tweeted_weekday, hour=tweeted_hour
),
"tweet_id": t.id,
"favorite_count": t.favorite_count,
"retweet_count": t.retweet_count,
Expand Down

0 comments on commit b15fc25

Please sign in to comment.