### Google Trends 網址結構

    https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=20200714&geo=TW&ns=15
    
    https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&ed=20200714&geo=JP&ns=15
    
    1. ed=YYYYMMDD
    2. geo=TW / geo=JP

In [1]:
import requests
import pandas as pd
import json
import re
from datetime import datetime, timedelta
import tqdm.notebook as tqdm
import os
import typing

---

### Functions

In [2]:
# 處理 "相關查詢" 欄位
def relatedQueries_proc(target):
    if target == []:
        target = '---'
    else:
        result = ''
        for relatedQuery in target:
            target = result + relatedQuery['query'] + ' / '
    
    return target

In [3]:
def trends_crawler(str_date: str, country: str) -> pd.DataFrame:
    url = f'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&geo={country}&ns=15&ed={str_date}'
    resp = requests.get(url)

    # 文字處理
    df = pd.DataFrame(json.loads(re.sub(r'\)\]\}\',\n', '', resp.text))['default']['trendingSearchesDays'][0]['trendingSearches'])

    # 欄位處理
    df = df.drop(columns='shareUrl')
    df['title'] = df['title'].apply(lambda x: x['query'])
    df['articles'] = df['articles'].apply(lambda x: x[0]['title'])
    df['relatedQueries'] = df['relatedQueries'].apply(relatedQueries_proc)
    try:
        df['image'] = df['image'].apply(lambda x: x['newsUrl'])
    except:
        pass
    df.columns = ['關鍵字', '搜尋筆數', '相關查詢', '文章連結', '相關文章']

    # 欄位移動
    col = '文章連結'
    move = df.pop(col)
    df.insert(4, col, move)
    
    return df

### 輸出檔案

In [4]:
# filename = 'trends_%s.csv' % datetime.now().strftime('%Y-%m-%d')
# with open(filename, 'w', encoding='utf-8-sig') as f:
#     df.to_csv(f, encoding='utf-8')

---
### 迴圈抓近一個月

In [5]:
# import html.parser
# base_url = 'https://trends.google.com.tw/trends/api/dailytrends?hl=zh-TW&tz=-480&geo=TW&ns=15&ed='
# html.unescape('&tz=-480&geo=TW&ns=15&ed=')

In [6]:
end_date = datetime.today()
start_date = end_date - timedelta(days=29)
str_end_date = datetime.strftime(end_date, '%Y%m%d')
str_start_date = datetime.strftime(start_date, '%Y%m%d')

### TW

In [7]:
for i in tqdm.tqdm((pd.date_range(start=start_date, end=end_date, freq='1D'))):
    str_i_date = datetime.strftime(i, '%Y%m%d')
    ndf = trends_crawler(str_i_date, 'TW')
    ndf['date'] = str_i_date
    with open(f'./data_tw/{str_i_date}.pkl', 'wb') as f:
        ndf.to_pickle(f)

  0%|          | 0/30 [00:00<?, ?it/s]

### JP

In [8]:
for i in tqdm.tqdm((pd.date_range(start=start_date, end=end_date, freq='1D'))):
    str_i_date = datetime.strftime(i, '%Y%m%d')
    ndf = trends_crawler(str_i_date, 'JP')
    ndf['date'] = str_i_date
    with open(f'./data_jp/{str_i_date}.pkl', 'wb') as f:
        ndf.to_pickle(f)

  0%|          | 0/30 [00:00<?, ?it/s]

---
### 讀取檔案

### TW

In [11]:
df_tw = []
for file in os.listdir('data_tw'):
    if 'pkl' in file:
        df_file = pd.read_pickle('./data_tw/' + file)
        df_tw.append(df_file)

### 更新 TW 字典

In [12]:
with open('./keyword_tw.txt', 'r', encoding='utf8') as f:
    kw_list = [kw.strip() for kw in f.readlines()]
    
new_kw = []
for data in df_tw:
    new_kw += [kw + '\n' for kw in data['關鍵字'] if kw.strip() not in kw_list]

with open('./keyword_tw.txt', 'a+', encoding='utf8') as f:
    f.writelines(''.join(list(set(new_kw))))

In [13]:
new_kw

['Clubhouse 邀請碼\n',
 '王祖賢\n',
 '鄭家純\n',
 '范綱皓\n',
 '嚴長壽\n',
 '石原聰美\n',
 '防疫照顧假\n',
 '飛機杯\n',
 '趙英俊\n',
 '李子柒\n',
 '元晶\n',
 '鳳山霸凌\n',
 '角頭浪流連\n',
 '蓋亞那\n',
 '李婉鈺\n']

### JP

In [14]:
df_jp = []
for file in os.listdir('data_jp'):
    if 'pkl' in file:
        df_file = pd.read_pickle('./data_jp/' + file)
        df_jp.append(df_file)

### 更新 JP 字典

In [15]:
with open('./keyword_jp.txt', 'r', encoding='utf-8') as f:
    kw_list = [kw.strip() for kw in f.readlines()]

new_kw = []
for data in df_jp:
    new_kw += [kw + '\n' for kw in data['關鍵字'] if kw.strip() not in kw_list]

with open('./keyword_jp.txt', 'a+', encoding='utf8') as f:
    f.writelines(''.join(list(set(new_kw))))

In [16]:
new_kw

['福岡 コロナ\n',
 '吉川愛\n',
 '南野拓実\n',
 '豊田剛一郎\n',
 'たこやきレインボー\n',
 'メドレー\n',
 'COCOA\n',
 '満島真之介\n',
 '満島ひかり\n',
 'コロナワクチン\n',
 'タカノフルーツパーラー\n',
 '榊原ゆい\n',
 'Redmi Note 9T\n',
 '川上洋平\n',
 '崎山蒼志\n']