# 라이브러리 로드

In [10]:
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 데이터 로드

In [2]:
boygroup_df = pd.read_csv(r'trend_dataset\naver_trend\boygroup_naver_trend.csv')
girlgroup_df = pd.read_csv(r'trend_dataset\naver_trend\girlgroup_naver_trend.csv')

boygroup_df['period'] = pd.to_datetime(boygroup_df['period'])
girlgroup_df['period'] = pd.to_datetime(girlgroup_df['period'])

In [4]:
print(boygroup_df.info())
print('\n')
print(girlgroup_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008 entries, 0 to 2007
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   period      2008 non-null   datetime64[ns]
 1   BTS         2008 non-null   float64       
 2   NCT DREAM   2008 non-null   float64       
 3   SEVENTEEN   2008 non-null   float64       
 4   Stray Kids  2008 non-null   float64       
 5   TXT         2008 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 94.2 KB
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2008 entries, 0 to 2007
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   period     2008 non-null   datetime64[ns]
 1   AESPA      1791 non-null   float64       
 2   BLACKPINK  2008 non-null   float64       
 3   IVE        2008 non-null   float64       
 4   NEWJEANS   779 non-null    float64       
 5   TWI

In [30]:
# 보이, 걸그룹 2020~2023년 4개년치 앨범 판매량 top5 아티스트 앨범정보
artist_album_df = pd.read_csv('album_dataset/top5_group_info.csv', index_col=0).drop(['main_genre', 'sub_genre'], axis=1).query("(event_begin_date >= '2020-01-01') and (release_date >= '2020-01-01')")
artist_album_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1844 entries, 29 to 4590
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   artist_name          1844 non-null   object 
 1   artist_id            1844 non-null   object 
 2   debut_date           1844 non-null   object 
 3   artist_types         1844 non-null   object 
 4   member_cnt           1844 non-null   int64  
 5   sub_group_cnt        1844 non-null   int64  
 6   label                1739 non-null   object 
 7   album_id             1844 non-null   object 
 8   album_name           1844 non-null   object 
 9   album_type           1844 non-null   object 
 10  release_date         1844 non-null   object 
 11  track_cnt            1844 non-null   int64  
 12  avg_track_play_time  1820 non-null   float64
 13  participant_type     1844 non-null   object 
 14  event_name           1844 non-null   object 
 15  event_type           1844 non-null   objec

# Simple EDA

## 보이그룹

In [31]:
artist_album_df.head()

Unnamed: 0,artist_name,artist_id,debut_date,artist_types,member_cnt,sub_group_cnt,label,album_id,album_name,album_type,release_date,track_cnt,avg_track_play_time,participant_type,event_name,event_type,event_begin_date,event_end_date
29,SEVENTEEN,e04d239e-9fa8-49b3-b9b7-9e439c3cb1d1,2015-05-26,"['k-pop', 'boy band', 'boy group', '3rd gen k-...",13,5,PLEDIS Entertainment,45dcfc52-110c-4a79-ba59-6b73b5f56502,Face the Sun,Album,2022-05-27,9,194044.111111,main performer,2023 MAMA Awards: Chapter 2,Award ceremony,2023-11-29,2023-11-29
31,SEVENTEEN,e04d239e-9fa8-49b3-b9b7-9e439c3cb1d1,2015-05-26,"['k-pop', 'boy band', 'boy group', '3rd gen k-...",13,5,PLEDIS Entertainment,45dcfc52-110c-4a79-ba59-6b73b5f56502,Face the Sun,Album,2022-05-27,9,194044.111111,main performer,Mnet Asian Music Awards 2020 MAMA,Award ceremony,2020-12-06,2020-12-06
45,SEVENTEEN,e04d239e-9fa8-49b3-b9b7-9e439c3cb1d1,2015-05-26,"['k-pop', 'boy band', 'boy group', '3rd gen k-...",13,5,PLEDIS Entertainment,394eddad-81e6-40e4-930d-0d95d824140c,Always Yours,Album,2023-08-23,9,199947.777778,main performer,2023 MAMA Awards: Chapter 2,Award ceremony,2023-11-29,2023-11-29
47,SEVENTEEN,e04d239e-9fa8-49b3-b9b7-9e439c3cb1d1,2015-05-26,"['k-pop', 'boy band', 'boy group', '3rd gen k-...",13,5,PLEDIS Entertainment,394eddad-81e6-40e4-930d-0d95d824140c,Always Yours,Album,2023-08-23,9,199947.777778,main performer,Mnet Asian Music Awards 2020 MAMA,Award ceremony,2020-12-06,2020-12-06
53,SEVENTEEN,e04d239e-9fa8-49b3-b9b7-9e439c3cb1d1,2015-05-26,"['k-pop', 'boy band', 'boy group', '3rd gen k-...",13,5,PLEDIS Entertainment,adcb9294-e046-437e-9cca-53f3fd2aa48c,17 IS RIGHT HERE,Album,2024-04-29,12,197710.166667,main performer,2023 MAMA Awards: Chapter 2,Award ceremony,2023-11-29,2023-11-29


In [32]:
boygroup_df = boygroup_df.rename(columns={'TXT' : 'TOMORROW X TOGETHER'})

In [46]:
# naver trend 시각화

def show_naver_trend(group_df):

    # 서브플롯 생성
    fig = make_subplots(rows=5, cols=1, shared_xaxes=False, subplot_titles=group_df.columns[1:])

    # 각 서브플롯에 데이터 추가
    for idx, artist in enumerate(group_df.columns[1:]):

        fig.add_trace(
            go.Scatter(
                x=group_df['period'],
                y=group_df[artist],
                mode='lines',
                name=artist
            ),
            row=idx+1, col=1
        )

        single_artist_album_df = artist_album_df.query(f"artist_name == '{artist}'")

        # 앨범 발매일
        album_release_date = single_artist_album_df['release_date'].unique()
        for date in album_release_date:
            fig.add_vline(x=pd.to_datetime(date), line=dict(color="red", width=2, dash="dash"), row=idx+1, col=1)

        # 공연 시작일
        concert_date = single_artist_album_df['event_begin_date'].unique()
        for date in concert_date:
            fig.add_vline(x=pd.to_datetime(date), line=dict(color="blue", width=2, dash="dash"), row=idx+1, col=1)


    # 레이아웃 조정
    fig.update_layout(
        height=1200,
        width=1000,
        title_text="Naver Trend Visualization",
        showlegend=False
    )

    # 그래프 표시
    fig.show()


show_naver_trend(boygroup_df)

## 걸그룹

In [40]:
girlgroup_df = girlgroup_df.rename(columns={'AESPA' : 'aespa', 'NEWJEANS' : 'NewJeans'})

In [43]:
show_naver_trend(girlgroup_df)