# 1. 데이터 로드

In [1]:
import os
import sys
import time
import random
import datetime
import requests
import pandas as pd
import numpy as np
import hashlib, hmac, base64
from itertools import combinations, permutations
from dtw import *
import json
import urllib.request
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
import pickle
from pytz import timezone
from difflib import SequenceMatcher

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from collections import defaultdict
from pytrends.request import TrendReq


Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



# 2. API설정

In [2]:
from api_set import APIClient

# API 설정
from utils import get_secret
BASE_URL = get_secret("BASE_URL")
CUSTOMER_ID = get_secret("CUSTOMER_ID")
API_KEY = get_secret("API_KEY")
SECRET_KEY = get_secret("SECRET_KEY")
URI = get_secret("URI")
METHOD = get_secret("METHOD")
# API 클라이언트 인스턴스 생성
api_client = APIClient(BASE_URL, CUSTOMER_ID, API_KEY, SECRET_KEY,URI,METHOD)


# 3. 연관검색어 수집

In [3]:
# 키 로드
from utils import load_keywords 
keywords_data = load_keywords('main_keyword.json')

from utils import get_today_date
# 오늘의 날짜 가져오기
formatted_today, day = get_today_date()


# 결과 저장 폴더 생성
from utils import make_directory

make_directory('./data')
make_directory('./data/rl_srch')
make_directory(f'./data/rl_srch/{day}')  # 키워드별 연관검색어 리스트 저장

In [4]:

# 검색어 리스트와 결과 저장 경로 설정
srch_keyword = ['keyword_final']  
save_path = './data/rl_srch/'  
print(api_client.base_url)

https://api.searchad.naver.com


In [5]:
srch_keyword

['keyword_final']

In [6]:
import os
import csv
import datetime
import asyncio
import pandas as pd

# 필요한 경우 비동기를 위한 nest_asyncio 적용
import nest_asyncio
nest_asyncio.apply()

from collect_keywords import collect_keywords

async def main(srch_keyword, day):
    # 오늘 날짜로 폴더 경로 생성
    folder_path = './data/rl_srch/' + datetime.datetime.now().strftime('%y%m%d')
    file_path = f"{folder_path}/collected_keywords.csv"
    
    # 폴더가 존재하는지 확인
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    # 파일이 존재하는지 확인
    if os.path.isfile(file_path):
        # 파일이 존재하면, 데이터를 읽어옵니다.
        collected_keywords_data = pd.read_csv(file_path)
    else:
        # 파일이 없으면, collect_keywords 함수를 호출해서 데이터를 수집합니다.
        collected_keywords_data = await collect_keywords(srch_keyword, day)
        # 결과를 CSV로 저장
        collected_keywords_data.to_csv(file_path, index=False)
    
    return collected_keywords_data
collected_keywords_data=asyncio.run(main(srch_keyword, day))

In [7]:
print(collected_keywords_data)

        연관키워드  월간검색수_합계    검색어
0          주식  528500.0     주식
1         미주부     370.0     주식
2       김현준대표     940.0     주식
3        퀀트투자    6650.0     주식
4        주식투자   12310.0     주식
...       ...       ...    ...
2471    토큰거래소      30.0  디지털자산
2472  가상화폐만들기      60.0  디지털자산
2473   코인제작업체      18.0  디지털자산
2474     코인업체      80.0  디지털자산
2475   가상화폐상장      70.0  디지털자산

[2476 rows x 3 columns]


In [8]:
sorted_df = collected_keywords_data.groupby('검색어').apply(lambda x: x.sort_values('월간검색수_합계', ascending=False)).reset_index(drop=True)

# 각 '검색어'별로 분리된 DataFrame을 리스트에 저장
grouped = sorted_df.groupby('검색어')
df_list = [group for _, group in grouped]
print(len(df_list))

4


  sorted_df = collected_keywords_data.groupby('검색어').apply(lambda x: x.sort_values('월간검색수_합계', ascending=False)).reset_index(drop=True)


In [9]:
from utils import merge_and_mark_duplicates_limited
collected_keywords_data = merge_and_mark_duplicates_limited(df_list)


collected_keywords_data



Unnamed: 0,연관키워드,월간검색수_합계,검색어,중복검색어
0,삼성전자주가,4461300.0,금리,"금리,금융상품,주식"
1,미국증시,2568100.0,금리,금리
2,부동산,1856800.0,금리,금리
3,나스닥선물,910200.0,금리,금리
4,나스닥,821300.0,금리,"금리,주식"
...,...,...,...,...
156,코스피시총순위,39250.0,주식,주식
157,현대자동차주식,38320.0,주식,주식
158,증시현황,36760.0,주식,주식
159,배당주,33730.0,주식,주식


In [10]:
import utils

def add_client_info(collected_keywords_data, start_id_index=1):
    clients = utils.get_secret("clients")
    start_id_index = 1
    clients = utils.get_secret("clients")
    # ID와 PW 컬럼을 데이터프레임에 추가하는 로직
    total_rows = len(collected_keywords_data)
    ids = []
    pws = []

    for i in range(total_rows):
        # 현재 id 인덱스 계산 (start_id_index를 기준으로)
        current_id_index = ((i // 500) + start_id_index) % len(clients)
        current_id_key = f"id_{current_id_index}"
        
        # 현재 id와 pw 할당
        current_id = clients[current_id_key]['client_id']
        current_pw = clients[current_id_key]['client_secret']
        
        ids.append(current_id)
        pws.append(current_pw)

    # ID와 PW 컬럼 추가
    collected_keywords_data['id'] = ids
    collected_keywords_data['pw'] = pws

    return collected_keywords_data
collected_keywords_data= add_client_info(collected_keywords_data)
collected_keywords_data

Unnamed: 0,연관키워드,월간검색수_합계,검색어,중복검색어,id,pw
0,삼성전자주가,4461300.0,금리,"금리,금융상품,주식",RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
1,미국증시,2568100.0,금리,금리,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
2,부동산,1856800.0,금리,금리,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
3,나스닥선물,910200.0,금리,금리,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
4,나스닥,821300.0,금리,"금리,주식",RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
...,...,...,...,...,...,...
156,코스피시총순위,39250.0,주식,주식,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
157,현대자동차주식,38320.0,주식,주식,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
158,증시현황,36760.0,주식,주식,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV
159,배당주,33730.0,주식,주식,RRYYzqMgic6itMdMQNdP,6JjVKXRLfV


In [11]:
def groupped_df(name,collected_keywords_data):
    grouped = collected_keywords_data.groupby(name)
    df_list = [group for _, group in grouped]
    return df_list
df_list=groupped_df('id',collected_keywords_data)
print(len(df_list))

1


In [12]:
import asyncio
import trend  # 가정: trend 모듈에 trend_maincode 함수가 정의되어 있음

# 비동기 메인 함수 수정
async def trend_main(df, clients):
    # 파라미터 설정
    params = {
        "search_keywords": list(df['연관키워드']),
        "id": df['id'].iloc[0],
        "pw": df['pw'].iloc[0],
        "api_url": "https://openapi.naver.com/v1/datalab/search",
        "name": '연관검색어'
    }
    api_url = "https://openapi.naver.com/v1/datalab/search"
    
    # trend_maincode 함수 실행
    results = await trend.trend_maincode(params, clients, api_url)
    return results

async def run_all(df_list, clients):
    tasks = [trend_main(df, clients) for df in df_list]
    results = await asyncio.gather(*tasks)
    return results

clients = get_secret("clients")  # clients 정보를 로드
# 이벤트 루프 실행
results = asyncio.run(run_all(df_list, clients))

In [19]:
from select_keyword import select_keyword, rising_keyword_analysis, monthly_rule

start_time = time.time()
analysis_periods = ['daily', 'weekly', 'month']

formatted_today, today_date = utils.get_today_date()
# 월별, 주별, 일별 키워드 분석 실행
for period in analysis_periods:
    for keyword_df_group in results:
        for keyword_df in keyword_df_group:
            monthly_start, monthly_end, weekly_start, weekly_end = monthly_rule(keyword_df, today_date, period)
            if monthly_start is not None:
                print(f"Monthly Start for {period}:", monthly_start, "| Type:", type(monthly_start))
            if weekly_start is not None:
                print(f"Weekly Start for {period}:", weekly_start, "| Type:", type(weekly_start))
            # 주어진 예시에서 monthly_start와 weekly_start가 중복 출력되므로, 이하 중복된 부분은 제거합니다.

# 주별, 월별 상승 키워드 분석 실행
rising_analysis_periods = ['weekly', 'month']
for period in rising_analysis_periods:
    for keyword_df_group in results:
        for keyword_df in keyword_df_group:
            rising_start, rising_end, rising_keywords = rising_keyword_analysis(keyword_df, today_date, period)
            if rising_start is not None:
                print(f"Rising Start for {period}:", rising_start, "| Type:", type(rising_start))
            if rising_end is not None:
                print(f"Rising End for {period}:", rising_end, "| Type:", type(rising_end))
            if rising_keywords is not None:
                print(f"Rising Keywords for {period}:", rising_keywords, "| Type:", type(rising_keywords))

# 일별, 주별, 월별 키워드 선택 실행
for period in analysis_periods:
    for keyword_df_group in results:
        for keyword_df in keyword_df_group:
            selected_keywords, selected_start, selected_end = select_keyword(keyword_df, today_date, period)
            if selected_keywords is not None:
                print(f"Selected Keywords for {period}:", selected_keywords, "| Type:", type(selected_keywords))
            if selected_start is not None:
                print(f"Selected Start for {period}:", selected_start, "| Type:", type(selected_start))
            if selected_end is not None:
                print(f"Selected End for {period}:", selected_end, "| Type:", type(selected_end))

end_time = time.time()
print(f"Analysis completed in {end_time - start_time} seconds.")

주별 지속상승 키워드 발견 : 캐피탈
Rising Start for weekly:                   캐피탈
2022-03-16  56.158634
2022-03-23  53.438005
2022-03-30  52.693556
2022-04-06  54.290738
2022-04-13  51.786677
...               ...
2024-02-07  80.062262
2024-02-14  74.309692
2024-02-21  81.469951
2024-02-28  80.008124
2024-03-06  80.387116

[104 rows x 1 columns] | Type: <class 'pandas.core.frame.DataFrame'>
Rising End for weekly:           검색일자        기준일자         유형 연관검색어        검색량
0   2022-03-16  2024-03-07  weekly급상승   캐피탈  56.158634
1   2022-03-23  2024-03-07  weekly급상승   캐피탈  53.438005
2   2022-03-30  2024-03-07  weekly급상승   캐피탈  52.693556
3   2022-04-06  2024-03-07  weekly급상승   캐피탈  54.290738
4   2022-04-13  2024-03-07  weekly급상승   캐피탈  51.786677
..         ...         ...        ...   ...        ...
99  2024-02-07  2024-03-07  weekly급상승   캐피탈  80.062262
100 2024-02-14  2024-03-07  weekly급상승   캐피탈  74.309692
101 2024-02-21  2024-03-07  weekly급상승   캐피탈  81.469951
102 2024-02-28  2024-03-07  weekly급상승   캐피탈  80