In [None]:
!pip install japanize-matplotlib

Collecting japanize-matplotlib
[?25l  Downloading https://files.pythonhosted.org/packages/aa/c0/b75d434be51a8cc11d2e9b36f2d7f93a1bcf63bde24dc79a61d329d60b2a/japanize-matplotlib-1.0.5.tar.gz (4.1MB)
[K     |████████████████████████████████| 4.1MB 1.4MB/s 
[?25hBuilding wheels for collected packages: japanize-matplotlib
  Building wheel for japanize-matplotlib (setup.py) ... [?25l[?25hdone
  Created wheel for japanize-matplotlib: filename=japanize_matplotlib-1.0.5-cp36-none-any.whl size=4118721 sha256=6b44f0f08b8e3acc26e7821351d2e3efd1dc70c95f56ad5f8546233e5f6b1c81
  Stored in directory: /root/.cache/pip/wheels/6c/8a/08/4a784957da9f3c2b4839b4986be2fba2a481877318948be52c
Successfully built japanize-matplotlib
Installing collected packages: japanize-matplotlib
Successfully installed japanize-matplotlib-1.0.5


In [None]:
import os
import time
from typing import List, Dict
import re
from datetime import datetime as dt
import pandas as pd
import numpy as np
import boto3
from boto3.dynamodb.conditions import Key
from fastprogress import progress_bar as pb
import torch
import torch.nn as nn
import japanize_matplotlib
from datetime import datetime, timedelta

In [None]:
_aws_session = boto3.session.Session(
    region_name='ap-northeast-1',
    aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
    aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
)

DYNAMO_DB = _aws_session.resource('dynamodb')

In [None]:
class DynamoDB:

    @staticmethod
    def put_items(
        table_name: str,
        items: List[Dict],
        use_batch_writer: bool = False,
    ) -> List:
        """[summary]
        
        Args:
            table_name (str): [description]
            items (List[Dict]): [description]
        
        Raises:
            e: [description]
        
        Returns:
            List: [description]
        """
        if not isinstance(items, list):
            items = [items]

        responses = []
        try:
            table = DYNAMO_DB.Table(table_name)
        except Exception as e:
            print(f'Failed at DYNAMO_DB.Table(table_name) : {e}')
            raise e

        if use_batch_writer:
            with table.batch_writer() as batch:
                for item in items:
                    try:
                        responses.append(
                            batch.put_item(
                                Item=item,
                            )
                        )
                    except Exception as e:
                        print(f'Failed to put data to DynamoDB. Skipping : {e}')
        else:
            for item in items:
                try:
                    responses.append(
                        table.put_item(
                            TableName=table_name,
                            Item=item,
                        )
                    )
                except Exception as e:
                    print(f'Failed to put data to DynamoDB. Skipping : {e}')

        return responses

    @staticmethod
    def partitionkey_query(
        table_name: str,
        partition_key_name: str,
        partition_key: str,
    ) -> List[Dict]:
        try:
            table = DYNAMO_DB.Table(table_name)
        except Exception as e:
            print(f'Failed at DYNAMO_DB.Table(table_name) : {e}')
            raise e

        try:
            response = table.query(
                KeyConditionExpression=Key(partition_key_name).eq(partition_key)
            )
        except Exception as e:
            print(f'Failed to query : {e}')
            return []

        return response['Items']

In [None]:
res = DynamoDB.partitionkey_query(
    table_name='finapp_twitter_trend',
    partition_key_name='date',
    partition_key='2020-04-09',
)

In [None]:
len(res)

9085

In [None]:
res[0]

{'date': '2020-04-09',
 'datetime': Decimal('1586358050'),
 'datetime_keyword': '20200409_000050_#ORβITさん月が綺麗ですね',
 'keyword': '#ORβITさん月が綺麗ですね',
 'volume': None}

In [None]:
datetime.fromtimestamp(res[0]['datetime'])

datetime.datetime(2020, 4, 8, 15, 0, 50)

In [None]:
df_trend = pd.DataFrame({
    'datetime': [datetime.fromtimestamp(r['datetime']) for r in res],
    'keyword': [r['keyword'] for r in res],
    'volume': [r['volume'] for r in res],
})

In [None]:
df_trend

Unnamed: 0,datetime,keyword,volume
0,2020-04-08 15:00:50,#ORβITさん月が綺麗ですね,
1,2020-04-08 15:00:50,#SUPERBEAVER,14926
2,2020-04-08 15:00:50,#imas_MOR,
3,2020-04-08 15:00:50,#あなたは天才で清楚なのか,
4,2020-04-08 15:00:50,#あなたを山手線の駅に例えると,46546
...,...,...,...
9080,2020-04-08 18:09:05,P-MODEL,
9081,2020-04-08 18:09:05,さくらしめじ,
9082,2020-04-08 18:09:05,しめじちゃん,
9083,2020-04-08 18:09:05,りあむソロ,13545


In [None]:
len(df_trend.keyword.unique())

121

In [None]:
df_trend.sort_values(by='volume')

Unnamed: 0,datetime,keyword,volume
5720,2020-04-08 16:59:53,#乃木坂46ANN,10007
5670,2020-04-08 16:58:51,#乃木坂46ANN,10007
5620,2020-04-08 16:57:49,#乃木坂46ANN,10007
5570,2020-04-08 16:56:47,#乃木坂46ANN,10007
5520,2020-04-08 16:55:45,#乃木坂46ANN,10007
...,...,...,...
9079,2020-04-08 18:09:05,It's my life,
9080,2020-04-08 18:09:05,P-MODEL,
9081,2020-04-08 18:09:05,さくらしめじ,
9082,2020-04-08 18:09:05,しめじちゃん,


In [None]:
df_trend_not_volume_nan = df_trend.dropna(subset=['volume'])

In [None]:
len(df_trend_not_volume_nan.keyword.unique())

45

In [None]:
df_trend_not_volume_nan.groupby(['keyword']).max()['volume']

keyword
#FAKEMOTION              11312
#SUPERBEAVER             15821
#ThePromisedSaviour    1159089
#fakemotion              11287
#あなたは天才で清楚なのか            10351
#あなたを山手線の駅に例えると          46622
#コレコレプレゼント企画             41046
#ドラブラ                    30205
#ニンテンドー3DS               23973
#ビーエヌエー                  10102
#ミラクル9                   18231
#レンタルなんもしない人             27267
#乃木坂46ANN                17231
#全ての住民に補償と給付で防疫を         90922
#増田貴久                    14006
#宮近海斗                    18356
#有吉の壁                    47286
#東大王                     45074
#水曜日のダウンタウン              10309
#特捜9                     24862
#祝35th山下智久0409           13421
#鬼滅の刃キャラ診断               36589
Creepy Nuts              21346
SUPER BEAVER             28826
りあむソロ                    13545
オリュンポス                  176353
カイニス                     72077
ジンネマン                    35482
スーパームーン                 137894
ゼウス                      19905
テレワーク                   228851
ディオスクロイ                  35482


In [None]:
res = []

for dt in pb(pd.date_range('2020-03-1', '2020-04-10')):
    res += DynamoDB.partitionkey_query(
        table_name='finapp_twitter_trend',
        partition_key_name='date',
        partition_key=str(dt.date()),
    )

In [None]:
df_trend = pd.DataFrame({
    'date': [r['date'] for r in res],
    'datetime': [datetime.fromtimestamp(r['datetime']) for r in res],
    'keyword': [r['keyword'] for r in res],
    'volume': [r['volume'] for r in res],
})

In [None]:
df_trend

Unnamed: 0,date,datetime,keyword,volume
0,2020-03-01,2020-02-29 15:00:11,#DMM_eスポーツ,
1,2020-03-01,2020-02-29 15:00:11,#RAS武道館,27277
2,2020-03-01,2020-02-29 15:00:11,#SONGS,22770
3,2020-03-01,2020-02-29 15:00:11,#nhkらじらー,93769
4,2020-03-01,2020-02-29 15:00:11,#にじさんじスプラ杯,27627
...,...,...,...,...
322049,2020-04-10,2020-04-09 16:42:03,焼きそば定食,
322050,2020-04-10,2020-04-09 16:42:03,石川知事,
322051,2020-04-10,2020-04-09 16:42:03,蘭ちゃん誕生日,
322052,2020-04-10,2020-04-09 16:42:03,通信障害,


In [None]:
df_trend_not_volume_nan = df_trend.dropna(subset=['volume'])

In [None]:
len(df_trend_not_volume_nan)

150842

In [None]:
df_trend_not_volume_nan[df_trend_not_volume_nan['date'] == '2020-03-31'].groupby(['keyword']).max()['volume']

keyword
#CDTVライブライブ       411327
#Da_iCE            22705
#EXIT              26705
#GENERATIONS       20850
#MAX_CHOCOLATE     62667
                   ...  
緊急記者会見            124536
舞祭組                17731
調整中                23483
都知事の会見             87999
龍友くん               16132
Name: volume, Length: 63, dtype: object

In [None]:
df_trend[df_trend['keyword'].str.contains('志村')].groupby(['date', 'keyword']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,volume
date,keyword,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-07,#志村どうぶつ園,71,0
2020-03-26,#志村けん頑張れ,193,193
2020-03-31,志村けんさん,49,49
2020-03-31,志村さん,21,21
2020-03-31,志村さん兄,145,141
2020-04-05,#志村どうぶつ園,95,95
2020-04-05,志村どうぶつ園,15,15
2020-04-05,志村動物園,184,184
2020-04-05,志村園長,28,28
2020-04-06,#志村どうぶつ園,178,178


In [None]:
df_trend['date'].unique()

array(['2020-03-01', '2020-03-02', '2020-03-07', '2020-03-08',
       '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12',
       '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16',
       '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20',
       '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24',
       '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28',
       '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-03',
       '2020-04-04', '2020-04-05', '2020-04-06', '2020-04-07',
       '2020-04-08', '2020-04-09', '2020-04-10'], dtype=object)

In [None]:
len(df_trend['keyword'].unique()) / len(df_trend['date'].unique())

99.85714285714286

In [None]:
df_trend[df_trend['keyword'].str.contains('コロナ')].groupby(['date', 'keyword']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,volume
date,keyword,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-02,#教えて新型コロナ,163,0
2020-03-12,#コロナウイルスお絵描き,88,0
2020-03-12,コロナウイルスの上手さ,17,0
2020-03-12,コロナウイルスの特徴,49,0
2020-03-17,#新型コロナあなたの不安,6,0
2020-03-20,#シンソウ坂上新型コロナ,187,187
2020-03-23,コロナ対策30兆円規模,23,23
2020-03-27,コロナ,184,184
2020-03-28,コロナ,100,100
2020-03-29,コロナ,36,36


In [None]:
df_trend[df_trend['keyword'].str.contains('マスク')].groupby(['date', 'keyword']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,volume
date,keyword,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-02,マスク配布,4,4
2020-03-07,マスク大量出品,12,12
2020-03-10,旧マスク工場,117,117
2020-03-12,マスク配布,9,3
2020-03-12,備蓄用マスクの配布対象,2,2
2020-03-18,マスク手作り600枚,9,0
2020-03-18,手作りマスク612枚,31,31
2020-04-03,マスク,183,183
2020-04-08,小池さんのマスク,25,25
2020-04-10,マスク2枚,73,73


In [None]:
df_trend[df_trend['keyword'].str.contains('安倍')].groupby(['date', 'keyword']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime,volume
date,keyword,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-01,#安倍やめるな,14,14
2020-03-01,#安倍やめろ,185,185
2020-03-15,#安倍やめろ,79,79
2020-03-25,#安倍総理10万円下さい,185,185
2020-04-08,#安倍首相会見,118,111
