In [1]:
from contextlib import contextmanager

from pandas.plotting._core import _get_plot_backend


def table(ax, data, rowLabels=None, colLabels=None, **kwargs):
    """
    Helper function to convert DataFrame and Series to matplotlib.table.
    Parameters
    ----------
    ax : Matplotlib axes object
    data : DataFrame or Series
        Data for table contents.
    **kwargs
        Keyword arguments to be passed to matplotlib.table.table.
        If `rowLabels` or `colLabels` is not specified, data index or column
        name will be used.
    Returns
    -------
    matplotlib table object
    """
    plot_backend = _get_plot_backend("matplotlib")
    return plot_backend.table(
        ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
    )

In [2]:
def register():
    plot_backend = _get_plot_backend("matplotlib")
    plot_backend.register()

In [3]:
register()

In [4]:
from datetime import timedelta, datetime
import glob
from itertools import chain
import json
import os
import re

import numpy as np
import pandas as pd

from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords
from konlpy.tag import Twitter
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

from pandas.plotting import register_matplotlib_converters
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib import font_manager, rc
font_path = 'C:/Windows/SysWOW64/Fonts/NanumGothic.ttf'
font_name = fm.FontProperties(fname=font_path, size=10).get_name()
plt.rc('font', family=font_name, size=12)
plt.rcParams["figure.figsize"] = (20, 10)
register_matplotlib_converters()



mpl.pyplot.rc('font', family='NanumGothic')

In [5]:
pd.options.mode.chained_assignment = None

# 데이터 불러오기

In [6]:
genre_gn_all = pd.read_json('genre_gn_all.json', typ = 'series')

# 댄스 찾기.
- 대분류 장르코드 숫자 네 자리 중 뒷자리 두 자리가 00인 코드임

In [7]:
# 장르코드 : gnr_code, 장르명 : gnr_name
genre_gn_all = pd.DataFrame(genre_gn_all, columns = ['gnr_name']).reset_index().rename(columns = {'index' : 'gnr_code'})

In [8]:
# 장르코드 뒷자리 두 자리가 00인 코드를 필터링
gnr_code = genre_gn_all[genre_gn_all['gnr_code'].str[-2:] == '00']

In [9]:
gnr_code[gnr_code['gnr_name'] == '댄스']

Unnamed: 0,gnr_code,gnr_name
6,GN0200,댄스


In [10]:
# GN0200 ~ GN0205 까지 댄스 장르임
genre_gn_all[genre_gn_all['gnr_code'].str[-3] == '2' ]

Unnamed: 0,gnr_code,gnr_name
6,GN0200,댄스
7,GN0201,세부장르전체
8,GN0202,'80
9,GN0203,'90
10,GN0204,'00
11,GN0205,'10-
90,GN1200,랩/힙합
91,GN1201,세부장르전체
92,GN1202,팝랩
93,GN1203,얼터너티브힙합


# 곡 별 메타 데이터 가져오기 : song_meta.json

In [11]:
song_meta = pd.read_json('song_meta.json', typ = 'frame')

In [12]:
song_meta

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id
0,[GN0901],20140512,불후의 명곡 - 7080 추억의 얄개시대 팝송베스트,2255639,[2727],Feelings,[GN0900],[Various Artists],0
1,"[GN1601, GN1606]",20080421,"Bach : Partitas Nos. 2, 3 & 4",376431,[29966],"Bach : Partita No. 4 In D Major, BWV 828 - II....",[GN1600],[Murray Perahia],1
2,[GN0901],20180518,Hit,4698747,[3361],Solsbury Hill (Remastered 2002),[GN0900],[Peter Gabriel],2
3,"[GN1102, GN1101]",20151016,Feeling Right (Everything Is Nice) (Feat. Popc...,2644882,[838543],Feeling Right (Everything Is Nice) (Feat. Popc...,[GN1100],[Matoma],3
4,"[GN1802, GN1801]",20110824,그남자 그여자,2008470,[560160],그남자 그여자,[GN1800],[Jude Law],4
...,...,...,...,...,...,...,...,...,...
707984,[GN2001],19991219,The Best Best Of The Black President,65254,[166499],Coffin For Head Of State,[GN2000],[Fela Kuti],707984
707985,[GN0901],19860000,True Colors,44141,[11837],Change Of Heart,[GN0900],[Cyndi Lauper],707985
707986,"[GN0105, GN0101]",20160120,행보 2015 윤종신 / 작사가 윤종신 Live Part.1,2662866,[437],스치듯 안녕,[GN0100],[윤종신],707986
707987,"[GN1807, GN1801]",20131217,명상의 시간을 위한 뉴에이지 음악,2221722,[729868],숲의 빛,[GN1800],[Nature Piano],707987


In [13]:
song_meta[song_meta['song_gn_gnr_basket'] == 'GN0900']

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id


In [14]:
# 곡 아이디(id)와 대분류 장르코드 리스트(song_gn_gnr_basket) 추출
song_gnr_map = song_meta.loc[:, ['id', 'song_gn_gnr_basket']]

# unnest song_gn_gnr_basket
# 3차원을 만들어준다.
song_gnr_map_unnest = np.dstack(
    (   
        # np.repeat 배열의 요소들을 반복함
        np.repeat(song_gnr_map.id.values, list(map(len, song_gnr_map.song_gn_gnr_basket))), 
        # 배열을 재연결 시켜 주어 새로운 배열을 만든다
        np.concatenate(song_gnr_map.song_gn_gnr_basket.values)
    )
)

# unnested 데이터프레임 생성 : song_gnr_map
song_gnr_map = pd.DataFrame(data = song_gnr_map_unnest[0], columns = song_gnr_map.columns)
song_gnr_map['id'] = song_gnr_map['id'].astype(str)
song_gnr_map.rename(columns = {'id' : 'song_id', 'song_gn_gnr_basket' : 'gnr_code'}, inplace = True)

# unnest 객체 제거
del song_gnr_map_unnest

In [15]:
song_gnr_map = song_gnr_map.rename(columns={'song_id': 'id'})
song_gnr_map

Unnamed: 0,id,gnr_code
0,0,GN0900
1,1,GN1600
2,2,GN0900
3,3,GN1100
4,4,GN1800
...,...,...
802854,707984,GN2000
802855,707985,GN0900
802856,707986,GN0100
802857,707987,GN1800


In [16]:
song = song_gnr_map[song_gnr_map['gnr_code'].str.contains('GN02')]
song.rename(columns={'song_id':'id'}, inplace=True)
str(song['id'])

'19            17\n34            30\n65            58\n216          195\n263          235\n           ...  \n802549    707714\n802581    707741\n802662    707809\n802817    707950\n802824    707956\nName: id, Length: 19017, dtype: object'

In [17]:
str(song_meta['id'])

'0              0\n1              1\n2              2\n3              3\n4              4\n           ...  \n707984    707984\n707985    707985\n707986    707986\n707987    707987\n707988    707988\nName: id, Length: 707989, dtype: int64'

In [18]:
song

Unnamed: 0,id,gnr_code
19,17,GN0200
34,30,GN0200
65,58,GN0200
216,195,GN0200
263,235,GN0200
...,...,...
802549,707714,GN0200
802581,707741,GN0200
802662,707809,GN0200
802817,707950,GN0200


In [19]:
song_gnr =pd.concat([song_meta,song_gnr_map])



In [20]:
song_meta

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id
0,[GN0901],20140512,불후의 명곡 - 7080 추억의 얄개시대 팝송베스트,2255639,[2727],Feelings,[GN0900],[Various Artists],0
1,"[GN1601, GN1606]",20080421,"Bach : Partitas Nos. 2, 3 & 4",376431,[29966],"Bach : Partita No. 4 In D Major, BWV 828 - II....",[GN1600],[Murray Perahia],1
2,[GN0901],20180518,Hit,4698747,[3361],Solsbury Hill (Remastered 2002),[GN0900],[Peter Gabriel],2
3,"[GN1102, GN1101]",20151016,Feeling Right (Everything Is Nice) (Feat. Popc...,2644882,[838543],Feeling Right (Everything Is Nice) (Feat. Popc...,[GN1100],[Matoma],3
4,"[GN1802, GN1801]",20110824,그남자 그여자,2008470,[560160],그남자 그여자,[GN1800],[Jude Law],4
...,...,...,...,...,...,...,...,...,...
707984,[GN2001],19991219,The Best Best Of The Black President,65254,[166499],Coffin For Head Of State,[GN2000],[Fela Kuti],707984
707985,[GN0901],19860000,True Colors,44141,[11837],Change Of Heart,[GN0900],[Cyndi Lauper],707985
707986,"[GN0105, GN0101]",20160120,행보 2015 윤종신 / 작사가 윤종신 Live Part.1,2662866,[437],스치듯 안녕,[GN0100],[윤종신],707986
707987,"[GN1807, GN1801]",20131217,명상의 시간을 위한 뉴에이지 음악,2221722,[729868],숲의 빛,[GN1800],[Nature Piano],707987


In [20]:
song_gnr_map['id']= song_gnr_map['id'].astype(int)
song_gnr_map

Unnamed: 0,id,gnr_code
0,0,GN0900
1,1,GN1600
2,2,GN0900
3,3,GN1100
4,4,GN1800
...,...,...
802854,707984,GN2000
802855,707985,GN0900
802856,707986,GN0100
802857,707987,GN1800


In [21]:
song_gnr1 =pd.merge(song_meta,song_gnr_map, how='right',on='id')
song_gnr1


Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id,gnr_code
0,[GN0901],20140512,불후의 명곡 - 7080 추억의 얄개시대 팝송베스트,2255639,[2727],Feelings,[GN0900],[Various Artists],0,GN0900
1,"[GN1601, GN1606]",20080421,"Bach : Partitas Nos. 2, 3 & 4",376431,[29966],"Bach : Partita No. 4 In D Major, BWV 828 - II....",[GN1600],[Murray Perahia],1,GN1600
2,[GN0901],20180518,Hit,4698747,[3361],Solsbury Hill (Remastered 2002),[GN0900],[Peter Gabriel],2,GN0900
3,"[GN1102, GN1101]",20151016,Feeling Right (Everything Is Nice) (Feat. Popc...,2644882,[838543],Feeling Right (Everything Is Nice) (Feat. Popc...,[GN1100],[Matoma],3,GN1100
4,"[GN1802, GN1801]",20110824,그남자 그여자,2008470,[560160],그남자 그여자,[GN1800],[Jude Law],4,GN1800
...,...,...,...,...,...,...,...,...,...,...
802854,[GN2001],19991219,The Best Best Of The Black President,65254,[166499],Coffin For Head Of State,[GN2000],[Fela Kuti],707984,GN2000
802855,[GN0901],19860000,True Colors,44141,[11837],Change Of Heart,[GN0900],[Cyndi Lauper],707985,GN0900
802856,"[GN0105, GN0101]",20160120,행보 2015 윤종신 / 작사가 윤종신 Live Part.1,2662866,[437],스치듯 안녕,[GN0100],[윤종신],707986,GN0100
802857,"[GN1807, GN1801]",20131217,명상의 시간을 위한 뉴에이지 음악,2221722,[729868],숲의 빛,[GN1800],[Nature Piano],707987,GN1800


# 전처리하기

In [22]:
dance_gnr_only = song_gnr1[song_gnr1['gnr_code'].str.contains('GN02')]
dance_gnr_only

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id,gnr_code
19,"[GN2503, GN0205, GN2501, GN2506, GN0201]",20160226,Melting,2669407,[750053],Girl Crush,"[GN2500, GN0200]",[마마무 (Mamamoo)],17,GN0200
34,"[GN2502, GN0205, GN2501, GN2506, GN1501, GN020...",20190519,A-TEEN2 Part.2,10286928,[861436],9-TEEN,"[GN2500, GN1500, GN0200]",[세븐틴],30,GN0200
65,[GN0201],20060407,Warming Up (Digital Single),316690,[160459],국화꽃 향기 (Feat. 김세찬 & DJ Jenifer),[GN0200],[디제이 버디],58,GN0200
216,"[GN0204, GN0201]",20010705,Look,2518,[100786],봐,[GN0200],[To-Ya],195,GN0200
263,"[GN2502, GN0205, GN2501, GN2506, GN0201]",20120323,The Return,2097929,[100078],Move With Me,"[GN2500, GN0200]",[신화],235,GN0200
...,...,...,...,...,...,...,...,...,...,...
802549,"[GN0205, GN0201]",20150413,오늘 예쁘네,2313643,[696519],오늘 예쁘네,[GN0200],[투포케이],707714,GN0200
802581,[GN0201],20120911,사랑이 떠나가요,2149699,[220287],잘부탁해요,[GN0200],[마골피],707741,GN0200
802662,"[GN0203, GN0201]",19990301,Storm,354444,[102753],Never,[GN0200],[루머스],707809,GN0200
802817,"[GN2502, GN0205, GN2501, GN2506, GN0201]",20150409,One Great Step Returns Live,2313220,[476181],Cover Girl (OGS Returns Live Ver.),"[GN2500, GN0200]",[인피니트],707950,GN0200


In [23]:
dance_gnr_only_organaized=dance_gnr_only.drop(columns=['song_gn_dtl_gnr_basket','album_name','album_id','artist_id_basket','song_gn_gnr_basket','gnr_code'])
dance_gnr_only_organaized


Unnamed: 0,issue_date,song_name,artist_name_basket,id
19,20160226,Girl Crush,[마마무 (Mamamoo)],17
34,20190519,9-TEEN,[세븐틴],30
65,20060407,국화꽃 향기 (Feat. 김세찬 & DJ Jenifer),[디제이 버디],58
216,20010705,봐,[To-Ya],195
263,20120323,Move With Me,[신화],235
...,...,...,...,...
802549,20150413,오늘 예쁘네,[투포케이],707714
802581,20120911,잘부탁해요,[마골피],707741
802662,19990301,Never,[루머스],707809
802817,20150409,Cover Girl (OGS Returns Live Ver.),[인피니트],707950


In [24]:
dance_gnr_only_organaized['artist_name_basket']= dance_gnr_only_organaized['artist_name_basket'].astype('str')
dance_gnr_only_organaized

Unnamed: 0,issue_date,song_name,artist_name_basket,id
19,20160226,Girl Crush,['마마무 (Mamamoo)'],17
34,20190519,9-TEEN,['세븐틴'],30
65,20060407,국화꽃 향기 (Feat. 김세찬 & DJ Jenifer),['디제이 버디'],58
216,20010705,봐,['To-Ya'],195
263,20120323,Move With Me,['신화'],235
...,...,...,...,...
802549,20150413,오늘 예쁘네,['투포케이'],707714
802581,20120911,잘부탁해요,['마골피'],707741
802662,19990301,Never,['루머스'],707809
802817,20150409,Cover Girl (OGS Returns Live Ver.),['인피니트'],707950


In [25]:
dance_gnr_only_organaized['artist_name_basket'].tolist()[0]

"['마마무 (Mamamoo)']"

In [26]:
a = dance_gnr_only_organaized['artist_name_basket'].str.replace("[", '')
b = a.str.replace("]", '')
b.str.replace("'", '')


  a = dance_gnr_only_organaized['artist_name_basket'].str.replace("[", '')
  b = a.str.replace("]", '')


19        마마무 (Mamamoo)
34                  세븐틴
65               디제이 버디
216               To-Ya
263                  신화
              ...      
802549             투포케이
802581              마골피
802662              루머스
802817             인피니트
802824               신화
Name: artist_name_basket, Length: 19017, dtype: object

In [27]:
dance_gnr_only_organaized['artist_name_basket'] = dance_gnr_only_organaized['artist_name_basket'].str.replace("[", '').str.replace("]", '').str.replace("'", '')
dance_gnr_only_organaized['artist_name_basket']



  dance_gnr_only_organaized['artist_name_basket'] = dance_gnr_only_organaized['artist_name_basket'].str.replace("[", '').str.replace("]", '').str.replace("'", '')


19        마마무 (Mamamoo)
34                  세븐틴
65               디제이 버디
216               To-Ya
263                  신화
              ...      
802549             투포케이
802581              마골피
802662              루머스
802817             인피니트
802824               신화
Name: artist_name_basket, Length: 19017, dtype: object

In [28]:
dance_gnr_only_organaized['artist_name_basket'].astype(str)

19        마마무 (Mamamoo)
34                  세븐틴
65               디제이 버디
216               To-Ya
263                  신화
              ...      
802549             투포케이
802581              마골피
802662              루머스
802817             인피니트
802824               신화
Name: artist_name_basket, Length: 19017, dtype: object

In [33]:
dance_gnr_only_organaized= dance_gnr_only_organaized.drop(columns='id')

# 멜론에 있는 노래 중 댄스 장르만 가져옴

In [34]:
# 댄스장르 멜론 차트에서 긁어옴


dance_gnr_only_organaized = dance_gnr_only_organaized.rename(columns={'issue_date':'발매일', 'song_name':'제목','artist_name_basket':'가수'})
dance_gnr_only_organaized

Unnamed: 0,발매일,제목,가수
19,20160226,Girl Crush,마마무 (Mamamoo)
34,20190519,9-TEEN,세븐틴
65,20060407,국화꽃 향기 (Feat. 김세찬 & DJ Jenifer),디제이 버디
216,20010705,봐,To-Ya
263,20120323,Move With Me,신화
...,...,...,...
802549,20150413,오늘 예쁘네,투포케이
802581,20120911,잘부탁해요,마골피
802662,19990301,Never,루머스
802817,20150409,Cover Girl (OGS Returns Live Ver.),인피니트
