In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# data columns

# videoId,
# title,
# description,
# tags,
# viewCount,
# likeCount,
# favoriteCount,
# commentCount,
# publishedAt,
# channelId,
# channelTitle,
# country

use_columns = ['videoId', 'title', 'description', 'tags', 'viewCount', 'likeCount', 'favoriteCount', 'commentCount', 'publishedAt', 'channelId', 'channelTitle', 'country']


In [3]:
from dotenv import load_dotenv
import os
import sys
import pandas as pd
import numpy as np
from youtubeapi import YoutubeApi
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import matplotlib.font_manager as fm
from utils import utils

# font setting
font_location = 'C:/Windows/Fonts/Malgun.ttf' if sys.platform == 'win32' else '/System/Library/Fonts/AppleSDGothicNeo.ttc'
font_name = fm.FontProperties(fname=font_location).get_name()
# print(font_name)
matplotlib.rc('font', family=font_name)

load_dotenv()
apiKey = os.environ.get('YoutubeApiToken')
url = 'https://youtube.googleapis.com/youtube/v3/'

api = YoutubeApi(apiKey, use_columns)

In [4]:
asia_data = pd.read_csv('../data/asia_data_kor.csv')
asia_data['Year'].fillna(method='ffill', inplace=True)
# asia_data

In [25]:
# 코로나 이전 기준 2019년 11월
before_start_date = '2019년1월'
before_end_date = '2019년12월'

# 코로나 이후 기준 2023년 5월 (데이터가 수집된 마지막 기준) 부터 1년
after_start_date = '2022년6월'
after_end_date = '2023년5월'

# 국가
countries = ['일본', '베트남', '대만', '홍콩', '필리핀', '태국']

In [26]:
asia_data['date'] = asia_data['Year'] + asia_data['month']
asia_data

Unnamed: 0,Year,month,법무부_명수,법무부_전년대비,일본_명수,일본_전년대비,중국_명수,중국_전년대비,베트남_명수,베트남_전년대비,...,사이프러스_전년대비,부탄_명수,부탄_전년대비,요르단_명수,요르단_전년대비,네팔_명수,네팔_전년대비,예멘_명수,예멘_전년대비,date
0,2004년,1월,793478.0,,164785.0,,186246.0,,,,...,,,,,,,,,,2004년1월
1,2004년,2월,670447.0,,142718.0,,215373.0,,,,...,,,,,,,,,,2004년2월
2,2004년,3월,587629.0,,112516.0,,190382.0,,,,...,,,,,,,,,,2004년3월
3,2004년,4월,642413.0,,120427.0,,223062.0,,,,...,,,,,,,,,,2004년4월
4,2004년,5월,680185.0,,115659.0,,202592.0,,,,...,,,,,,,,,,2004년5월
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,2019년,누계,28714247.0,0.000636,5584597.0,-0.259234,4346567.0,0.036924,4290802.0,0.248994,...,,1408.0,0.410822,20322.0,0.356156,31108,-0.16416787575904135,,,2019년누계
260,2020년,누계,4276006.0,-0.851084,487939.0,-0.912628,0.0,,840041.0,-0.804223,...,,142.0,-0.899148,6825.0,-0.664157,6944,-0.7767776777677768,0,,2020년누계
261,2021년,누계,1222541.0,-0.714093,18947.0,-0.961169,0.0,,32500.0,-0.961311,...,,0.0,,1031.0,-0.848938,0,,0,,2021년누계
262,2022년,누계,6554031.0,4.360991,1012751.0,52.451787,0.0,,769167.0,,...,,0.0,,0.0,,0,,,,2022년누계


In [27]:
results = pd.DataFrame()

# 각 국가별 loop
for country in countries:
  # 코로나 이전 (2018년12월 ~ 2019년11월)
  before = utils.get_dataframe_by_period2(asia_data, before_start_date, before_end_date, country)
  # 코로나 이후 (2022년6월 ~ 2023년5월)
  after = utils.get_dataframe_by_period2(asia_data, after_start_date, after_end_date, country)
  
  temp1 = before.loc[before['count'].idxmax()].to_frame().T
  temp1 = temp1.reset_index(drop=False)
  temp1 = temp1.rename({'index': 'before_max_index', 'count': 'before_max_count', 'date': 'before_max_date'}, axis=1)
  
  temp2 = after.loc[after['count'].idxmax()].to_frame().T
  temp2 = temp2.reset_index(drop=False)
  temp2 = temp2.rename({'index': 'after_max_index', 'count': 'after_max_count', 'date': 'after_max_date'}, axis=1)
  
  temp3 = pd.Series(country).to_frame().T
  
  result = pd.concat([temp3, temp1, temp2], axis=1)
  result = result.rename({0: 'country'}, axis=1)
  result
  
  results = pd.concat([results, result])

results = results.reset_index(drop=True)
results['pct'] = (results['after_max_count'] / results['before_max_count'] * 100)
results.to_csv('../data/countries_max.csv', encoding='utf-8-sig')

In [28]:
results

Unnamed: 0,country,before_max_index,before_max_count,before_max_date,after_max_index,after_max_count,after_max_date,pct
0,일본,180,779383.0,2019년1월,229,568622.0,2023년2월,72.957968
1,베트남,191,424736.0,2019년12월,229,301343.0,2023년2월,70.948307
2,대만,191,160667.0,2019년12월,229,59195.0,2023년2월,36.843285
3,홍콩,180,161115.0,2019년1월,232,25909.0,2023년5월,16.08106
4,필리핀,191,203965.0,2019년12월,229,127478.0,2023년2월,62.499939
5,태국,180,208075.0,2019년1월,227,130517.0,2022년12월,62.72594
