# 데이터 수집(web scraping)
- Musicow사이트에서 가격순 음원리스트 추출해오기 

In [1]:
# 필요한 라이브러리 import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# seaborn lib add
import seaborn as sns

from glob import glob

%matplotlib inline

import json

import warnings
warnings.filterwarnings(action = 'ignore') 

from datetime import date, datetime, timedelta
from dateutil.parser import parse

print("numpy version:",np.__version__) 
print("pandas version:",pd.__version__)

# 한글 폰트 문제 해결
import platform

from matplotlib import font_manager, rc

if platform.system() == 'Darwin':
    plt.rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    path = "c:/Windows/Fonts/malgun.ttf"
    font_name = font_manager.FontProperties(fname=path).get_name()
    plt.rc('font', family=font_name)
else:
    print('Unknown system... sorry~~~~') 


# 차트 축 <- 음수 부호 지원
import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False

numpy version: 1.20.1
pandas version: 1.2.4


In [2]:
from bs4 import BeautifulSoup
from urllib.request import urlopen, urlretrieve
from urllib.error   import HTTPError
from urllib.error   import URLError
import requests
import re
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys

In [3]:
path = 'C:/Users/whgud/driver/chromedriver_win32/chromedriver.exe'
driver = webdriver.Chrome(path)
driver.get('https://www.musicow.com/market?tab=list&sortorder=amt_market_latest_desc')

### 1. 앨범재킷사진 가져오기

In [41]:
images = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.thmb > img')

img_url = []

for image in images :
    url = image.get_attribute('src')
    img_url.append(url)
img_url

['https://d1z4u1c8ot3who.cloudfront.net/song/210303/73eca192.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/f0465186.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/e415012e.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/d401946e.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/6c34a7fb.png',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/13e7890e.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/f8d04f19.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/3dffd915.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/1f2abe5e.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/2ae9d8e9.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/c1ffc507.png',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/70eb4baf.png',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/3aaf868e.jpg',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210105/df4b6ae3.png',
 'https://d1z4u1c8ot3who.cloudfront.net/song/210323/6f3a7eab.j

### 2. 곡명 가져오기

In [42]:
titles = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > strong')

title_list = []


for title in titles :
    title_name = title.text
    title_list.append(title_name)

title_list


["롤린 (Rollin')",
 '미리 메리 크리스마스 (Feat. 천둥 Of MBLAQ)',
 '축가',
 '저녁하늘',
 '인오늘 그녀가',
 '눈물 자국',
 '향수',
 'Fly',
 '안녕',
 '하이힐',
 '눈물아 안녕',
 '이름',
 '내입술...따뜻한 커피처럼',
 '그 중에 그대를 만나',
 'LOVE DAY',
 '인맨발의 청춘',
 '여자라서',
 'Give it 2 Me',
 '바다 끝',
 '인젊은 날의 초상']

- 곡명 앞에 "인"이라는 글자 붙어서 나오는 경우 있어서 고려해줄 필요가 있음!

### 3.아티스트명 가져오기

In [43]:

artists = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > span')

artist_list = []


for artist in artists :
    artist_name = artist.text
    artist_list.append(artist_name)

artist_list

['브레이브걸스',
 '아이유',
 '전우성 (노을)',
 '에일리(AILEE)',
 '루그',
 '김종국',
 '케이윌, 전우성 (노을)',
 'SUPER JUNIOR-K.R.Y. (슈퍼주니어-K.R.Y.)',
 '박혜경',
 '브레이브걸스',
 '아이비',
 '임재범',
 '샵',
 '이선희',
 '양요섭, 정은지',
 '벅',
 '아이유',
 '신화',
 '최백호',
 '주영훈']

### 4. 1주일 거래가 가져오기

In [44]:
transaction_prices = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dt > strong')

transaction_prices_list = []


for transaction_price in transaction_prices :
    price = transaction_price.text
    transaction_prices_list.append(price)

transaction_prices_list

['860,200',
 '847,600',
 '576,300',
 '418,000',
 '350,000',
 '350,000',
 '330,000',
 '330,000',
 '300,600',
 '278,000',
 '270,000',
 '249,000',
 '229,700',
 '200,000',
 '184,400',
 '174,200',
 '170,000',
 '170,000',
 '160,000',
 '150,000']

### 5. 저작권료 가져오기(1주)

In [45]:
copyright_fees = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd.bg > span')

copyright_fee_list = []


for copyright_fee in copyright_fees :
    fee = copyright_fee.text
    copyright_fee_list.append(fee)

copyright_fee_list

['27,044 (3.1%)',
 '2,350 (0.3%)',
 '17,608 (3.1%)',
 '2,313 (0.6%)',
 '14,626 (4.2%)',
 '905 (0.3%)',
 '4,468 (1.4%)',
 '939 (0.3%)',
 '8,655 (2.9%)',
 '5,361 (1.9%)',
 '1,075 (0.4%)',
 '1,343 (0.5%)',
 '2,452 (1.1%)',
 '1,629 (0.8%)',
 '6,325 (3.4%)',
 '5,152 (3.0%)',
 '1,217 (0.7%)',
 '351 (0.2%)',
 '5,019 (3.1%)',
 '3,315 (2.2%)']

### 6. 전일비 가져오기

In [18]:
diff = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd:nth-child(2) > strong')

diff_list = []


for d in diff :
    전일비 = d.text
    if 전일비 == '-':
        전일비.replace('-','0 (0%)')
    diff_list.append(전일비)

diff_list

['13,800 (1.6%)',
 '3,000 (-0.4%)',
 '26,200 (-4.5%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '7,500 (2.7%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '7,000 (-3.5%)',
 '0 (0%)',
 '0 (0%)',
 '0 (0%)',
 '15,900 (-8.6%)',
 '0 (0%)',
 '0 (0%)']

In [21]:
### 통합해서 가져오기! 
img_list = []
title_list=[]
artist_list=[]
transaction_prices_list=[]
copyright_fee_list=[]
diff_list=[]


- 1~ 5페이지랑 6~ 48페이지까지 xpath의 경로 패턴이 다르기 때문에 나눠서 크롤링 진행! 

In [None]:
//*[@id="market_list"]/div[3]/a[1] # 2페이지 - 1부터 시작
//*[@id="market_list"]/div[3]/a[2]
//*[@id="market_list"]/div[3]/a[3]
//*[@id="market_list"]/div[3]/a[4]
//*[@id="nextPage"]
//*[@id="market_list"]/div[3]/a[2] # 7페이지 - 2부터 시작
//*[@id="market_list"]/div[3]/a[3]

In [22]:
# 1~5 페이지까지 크롤링
for i in range(5):
    page_btn =  driver.find_element_by_xpath('//*[@id="market_list"]/div[3]/a['+str(i+1)+']')

    images = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.thmb > img')

    for image in images :
        url = image.get_attribute('src')
        img_list.append(url)
        print(url)
    
    
    titles = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > strong')

    for title in titles :
        
        title_name = title.text
        title_list.append(title_name)
        print(title_name)
    
        
    artists = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > span')

    for artist in artists :
        artist_name = artist.text
        artist_list.append(artist_name)
        print(artist_name)
    
    
    transaction_prices = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dt > strong')

    for transaction_price in transaction_prices :
        price = transaction_price.text
        transaction_prices_list.append(price)
        print(price)
    time.sleep(1)
    
    copyright_fees = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd.bg > span')

    for copyright_fee in copyright_fees :
        fee = copyright_fee.text
        copyright_fee_list.append(fee)
        print(fee)
    
    diff = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd:nth-child(2) > strong')

    for d in diff :
        전일비 = d.text
        if 전일비 == '-':
            전일비.replace('-','0 (0%)')
        diff_list.append(전일비)

    time.sleep(2)    

    page_btn.send_keys(Keys.ENTER)

    time.sleep(2)

https://d1z4u1c8ot3who.cloudfront.net/song/210303/73eca192.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/f0465186.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/e415012e.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/d401946e.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/6c34a7fb.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/13e7890e.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/f8d04f19.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/3dffd915.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/1f2abe5e.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/2ae9d8e9.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/c1ffc507.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/70eb4baf.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/3aaf868e.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/df4b6ae3.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/84eb2682.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/092c0

진심
I Like That
친구라도 될 걸 그랬어
Again
인죄
Tic Toc
Loving U (러빙유)
추억은 눈꽃처럼
넌 살아있다
모든 밤 너에게 (연애혁명 X 민현 (뉴이스트))
인너를 사랑하고도
O Sole Mio
나를 사랑했던 사람아
그대 고운 내사랑
밤 끝없는 밤
거짓말
If You Love Me (Feat. 박재범)
여름여름해 (Sunny Summer)
인처음 보는 나 (하트시그널 삽입곡)
사랑비
이예준
씨스타
거미
스페이스 에이
루그
매드클라운
씨스타
강수지
버즈
민현 (뉴이스트)
전유나
IZ*ONE (아이즈원)
허각
어반자카파
AKMU (악뮤)
BIGBANG
NS 윤지
여자친구 (GFRIEND)
Steady (스테디)
김태우
72,800
72,000
71,100
70,800
70,500
70,100
70,000
69,800
69,700
69,300
69,100
69,100
69,100
69,100
69,000
69,000
69,000
68,800
68,600
68,300
3,729 (5.1%)
1,055 (1.5%)
1,031 (1.5%)
963 (1.4%)
3,972 (5.6%)
308 (0.4%)
2,210 (3.2%)
314 (0.4%)
542 (0.8%)
6,108 (8.8%)
2,806 (4.1%)
9,929 (14.4%)
2,903 (4.2%)
1,914 (2.8%)
1,352 (2.0%)
1,208 (1.8%)
1,035 (1.5%)
3,597 (5.2%)
4,115 (6.0%)
2,088 (3.1%)


In [23]:
# 각 리스트 길이 확인
print(len(img_list),len(title_list),len(artist_list),len(transaction_prices_list), len(copyright_fee_list),len(diff_list))

100 100 100 100 100 100


In [24]:
# 6~25 페이지까지 크롤링
for idx in range(4):
    for i in range(5):
        page_btn =  driver.find_element_by_xpath('//*[@id="market_list"]/div[3]/a['+str(i+2)+']')

        images = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.thmb > img')

        for image in images :
            url = image.get_attribute('src')
            img_list.append(url)
            print(url)
        time.sleep(1)
        
        titles = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > strong')

        for title in titles :
            title_name = title.text
            title_list.append(title_name)
            print(title_name)
        time.sleep(1)
        
        
        artists = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > span')

        for artist in artists :
            artist_name = artist.text
            artist_list.append(artist_name)
            print(artist_name)
        time.sleep(1)
            
        transaction_prices = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dt > strong')

        for transaction_price in transaction_prices :
            price = transaction_price.text
            transaction_prices_list.append(price)
            print(price)
        time.sleep(1)
        
        copyright_fees = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd.bg > span')

        for copyright_fee in copyright_fees :
            fee = copyright_fee.text
            copyright_fee_list.append(fee)
            print(fee)
        
        diff = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd:nth-child(2) > strong')

        for d in diff :
            전일비 = d.text
            if 전일비 == '-':
                전일비.replace('-','0 (0%)')
            diff_list.append(전일비)

            
        time.sleep(2)    

        page_btn.send_keys(Keys.ENTER)

        time.sleep(2)

https://d1z4u1c8ot3who.cloudfront.net/song/210105/fc3c20da.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/3abbf900.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/92b624d8.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/6c52ca00.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210830/94082fd0.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/e4e4c066.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/780a83fe.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/7760db08.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/7159ec45.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/57291954.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/18ffb115.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/a8f5ff2c.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/3267667c.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/38d1ff84.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/cfcd3035.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/e3208

https://d1z4u1c8ot3who.cloudfront.net/song/210128/9f784b0c.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/d3ad4da4.png
인한번만...
INTRO. 권지용 (Middle Fingers-Up)
인Insomnia (불면증)
사랑은 아프려고 하는 거죠
인Endless
인떠나가요, 떠나지마요
White Day
인하얀 바람
인Dream (Prod. by 박근태)
나를 잊지 말아요
어젠 (DUET.환희)
인HAVE A GOODNIGHT (취향저격 그녀 X 셔누 (몬스타엑스), 민혁 (몬스타엑스))
instagram
IF... (너만 있으면)
OOH-AHH하게
고백
I Swear
인Too Love
인백설공주를 사랑한 난장이
랄랄라 (Feat. 소야)
혜령
G-DRAGON
휘성 (Realslow)
엠씨더맥스 (M.C the MAX)
플라워
더 크로스
걸스데이
소방차
수지 (SUZY), 백현 (BAEKHYUN)
허각
송지은
셔누 (몬스타엑스), 민혁 (몬스타엑스)
DEAN
B1A4
TWICE (트와이스)
장나라
씨스타
XIA (준수)
더 크로스
마이티 마우스
50,300
50,000
49,800
49,800
49,700
49,600
49,000
48,900
48,700
48,500
48,400
48,400
48,300
48,200
48,000
47,600
47,600
47,500
47,500
47,300
2,417 (4.8%)
969 (1.9%)
2,304 (4.6%)
967 (1.9%)
1,362 (2.7%)
2,474 (5.0%)
382 (0.8%)
1,693 (3.5%)
2,393 (4.9%)
1,370 (2.8%)
817 (1.7%)
10,994 (22.7%)
1,078 (2.2%)
186 (0.4%)
869 (1.8%)
1,138 (2.4%)
2,740 (5.8%)
2,427 (5.1%)
2,279 (4.8%)
679 (1.4%)
https://d1z4u1c8ot3

늪
지우개
때려박는 랩 (Bonus Track) (Battlecry)
인당신을 위하여
SHAKE IT
안되나요
홀로 된다는 것
다시는 사랑하지 않고, 이별에 아파하기 싫어
인좋다
추억속의 그대
Hello
두근두근
다시 쓰고 싶어
Starry Night
난 예술이야
꽃 (Feat. Justhis) (Flowers)
인들었다 놨다
인나를 슬프게 하는 사람들
D.I.S.C.O (Feat. TOP)
인미스터리 (Feat. San E)
조관우
알리 (ALi)
매드클라운
더 크로스
씨스타
휘성 (Realslow)
변진섭,김영호
백지영
데이브레이크 (DAYBREAK)
이승기
소향
벤
에일리(AILEE)
모모랜드 (MOMOLAND)
헬로비너스
매드클라운
데이브레이크 (DAYBREAK)
김경호
엄정화
박지윤
41,900
41,600
41,600
41,600
41,500
41,500
41,300
41,200
41,000
41,000
41,000
40,900
40,900
40,800
40,600
40,500
40,400
40,300
40,300
40,300
1,701 (4.1%)
731 (1.8%)
524 (1.3%)
1,867 (4.5%)
934 (2.3%)
1,020 (2.5%)
1,709 (4.1%)
3,260 (7.9%)
1,753 (4.3%)
1,994 (4.9%)
1,257 (3.1%)
1,181 (2.9%)
771 (1.9%)
2,253 (5.5%)
2,318 (5.7%)
821 (2.0%)
1,378 (3.4%)
1,353 (3.4%)
1,268 (3.1%)
2,222 (5.5%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/387d1183.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/7796a53b.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210523/d9b62b7b.jpg
https://d1z4u1c8ot3who.cloudf

장나라
규현 (KYUHYUN)
에일리(AILEE)
박지윤
김경호
멜로망스
김수희
NATURE (네이처)
Ecobridge
MC몽
몬스타엑스
걸스데이
윙크
김재중
범키
빅마마
V.O.S
다이나믹 듀오
김정민
SS501
37,100
37,000
37,000
37,000
37,000
37,000
37,000
36,900
36,900
36,700
36,600
36,600
36,500
36,500
36,500
36,500
36,300
36,200
36,200
36,100
1,078 (2.9%)
268 (0.7%)
946 (2.6%)
991 (2.7%)
2,122 (5.7%)
2,208 (6.0%)
1,052 (2.8%)
1,813 (4.9%)
752 (2.0%)
2,796 (7.6%)
1,910 (5.2%)
1,674 (4.6%)
1,339 (3.7%)
1,727 (4.7%)
865 (2.4%)
1,086 (3.0%)
1,688 (4.7%)
898 (2.5%)
899 (2.5%)
2,006 (5.6%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/82147abe.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/50d31918.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/fe5bedc5.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/9a1f8432.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/c4f1f857.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/a1e6b428.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/6ed13270.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/74

32,500
32,500
32,400
32,400
32,400
32,300
32,300
32,200
32,200
32,100
32,100
32,000
32,000
32,000
32,000
32,000
32,000
31,900
31,900
31,800
682 (2.1%)
644 (2.0%)
1,250 (3.9%)
1,289 (4.0%)
1,131 (3.5%)
863 (2.7%)
1,766 (5.5%)
450 (1.4%)
267 (0.8%)
836 (2.6%)
1,073 (3.3%)
1,453 (4.5%)
1,448 (4.5%)
1,236 (3.9%)
2,005 (6.3%)
1,304 (4.1%)
950 (3.0%)
1,328 (4.2%)
1,484 (4.7%)
3,220 (10.1%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/31c4d028.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/ea835705.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/4b2ea440.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/154a2c75.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210915/7890a529.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/ea456d89.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210712/ac401af4.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/bf1b21a5.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/fd699774.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210

In [25]:
# 각 리스트 요소 개수 확인
print(len(img_list),len(title_list),len(artist_list),len(transaction_prices_list), len(copyright_fee_list),len(diff_list))

500 500 500 500 500 499


In [27]:
# 목격자라는 노래의 전일비가 '-'라 크롤링을 못해와서 1개 모자라서 인덱스 확인
title_list.index("인목격자")

343

In [28]:
# 해당위치에 값 추가
diff_list.insert(343,'0 (0%)')

In [29]:
# 각 리스트 요소 개수 확인
print(len(img_list),len(title_list),len(artist_list),len(transaction_prices_list), len(copyright_fee_list),len(diff_list))

500 500 500 500 500 500


In [30]:
# 26~45 페이지까지 크롤링
for idx in range(4):
    for i in range(5):
        page_btn =  driver.find_element_by_xpath('//*[@id="market_list"]/div[3]/a['+str(i+2)+']')

        images = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.thmb > img')

        for image in images :
            url = image.get_attribute('src')
            img_list.append(url)
            print(url)
        time.sleep(1)
        
        titles = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > strong')

        for title in titles :
            title_name = title.text
            title_list.append(title_name)
            print(title_name)
        time.sleep(1)
        
        
        artists = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > span')

        for artist in artists :
            artist_name = artist.text
            artist_list.append(artist_name)
            print(artist_name)
        time.sleep(1)
            
        transaction_prices = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dt > strong')

        for transaction_price in transaction_prices :
            price = transaction_price.text
            transaction_prices_list.append(price)
            print(price)
        time.sleep(1)
        
        copyright_fees = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd.bg > span')

        for copyright_fee in copyright_fees :
            fee = copyright_fee.text
            copyright_fee_list.append(fee)
            print(fee)
        
        diff = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd:nth-child(2) > strong')

        for d in diff :
            전일비 = d.text
            if 전일비 == '-':
                전일비.replace('-','0 (0%)')
            diff_list.append(전일비)

            
        time.sleep(2)    

        page_btn.send_keys(Keys.ENTER)

        time.sleep(2)

https://d1z4u1c8ot3who.cloudfront.net/song/210823/83205307.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/ddf302bc.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/41aa706d.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/177363dd.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/4fbf1fa9.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210607/5c3bf189.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/676f762a.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/2b916c7b.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210411/d68b34e0.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/4378c57d.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210930/639c9713.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/e714947c.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210614/748aadf4.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210621/5463ecd9.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/b0a7aa8b.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/9162e

https://d1z4u1c8ot3who.cloudfront.net/song/210105/14dfeeea.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/ae120199.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/92a9c66d.jpg
인사랑한다 말할까
미친거니
인우리 어머니
DAY BY DAY
인Rainbow Falling
넘어와 (Feat. 백예린)
핑크빛 로맨스 (Pink Romance)
다시 만난 날
내가 야! 하면 넌 예! (Duet With LYN)
누나 (NUNA)
꿈처럼 내린
인질투가 좋아
SEDANSOGU (세상에 단 하나뿐인 소중한 그대)
Really Like You
맘마미아
망설이다
여기요
WE LIKE 2 PARTY
그래도 남자니까
아파 (Slow)
소유 (SOYOU)
바이브
최향
티아라
차은우 (ASTRO)
DEAN
케이윌, 씨스타, 보이프랜드
휘성 (Realslow)
김태우
김재환
다비치
멜로망스
수호 (SUHO)
IZ*ONE (아이즈원)
카라
GOT7 (갓세븐)
홍자 (미스트롯)
BIGBANG
MC몽
2NE1
26,500
26,500
26,500
26,500
26,400
26,400
26,200
26,200
26,100
26,100
26,100
26,100
26,000
26,000
26,000
26,000
26,000
26,000
26,000
26,000
936 (3.5%)
813 (3.1%)
3,539 (13.4%)
589 (2.2%)
1,575 (6.0%)
461 (1.7%)
537 (2.0%)
1,083 (4.1%)
1,285 (4.9%)
1,239 (4.7%)
1,370 (5.2%)
1,490 (5.7%)
1,561 (6.0%)
2,221 (8.5%)
530 (2.0%)
481 (1.9%)
1,522 (5.9%)
1,275 (4.9%)
709 (2.7%)
1,072 (4.1%)
https://d1z4u1c8ot3who.clou

인다 생각나서
Ice Cream (Feat. Maboos)
SO CURIOUS
노는 게 제일 좋아 (OH YA YA YA)
180˚
그렇게 사랑하고 그렇게 웃었습니다
너, 한눈 팔지마!
인열기구
사랑은 이제 그만
눈물이 툭 (Feat. 박경 Of 블락비)
그럴걸
나쁜놈 (Feat. 소야)
EVERYDAY I LOVE YOU (Feat. 하슬)
널 너무 사랑해서 (Feat. 정은지 of 에이핑크)
아름다운 밤이야
진절머리 (Feat. Okasian & Dok2)
울고, 불고...
죽을 것만 같아
넌 너무 야해 (Feat. 긱스) (The Way You Make Me Melt)
Wake Up
김민석 (멜로망스)
현아
IZ*ONE (아이즈원)
루나솔라 (LUNARSOLAR)
MC몽
버블 시스터즈
걸스데이
SURL (설)
버블 시스터즈
지아
김나영
마이티 마우스
이달의소녀
MC몽
비스트
빈지노 (Beenzino)
지아, 포맨
환희
씨스타
일레인
24,400
24,400
24,300
24,300
24,300
24,300
24,300
24,200
24,200
24,200
24,200
24,100
24,100
24,100
24,100
24,000
24,000
24,000
23,900
23,900
1,016 (4.2%)
885 (3.6%)
1,172 (4.8%)
715 (2.9%)
778 (3.2%)
1,050 (4.3%)
441 (1.8%)
1,208 (5.0%)
417 (1.7%)
671 (2.8%)
902 (3.7%)
708 (2.9%)
890 (3.7%)
623 (2.6%)
804 (3.3%)
967 (4.0%)
730 (3.0%)
393 (1.6%)
658 (2.8%)
1,618 (6.8%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/489a484f.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210929/503fa455.jpg
https://d1z4u1c8ot3who.cloudf

인Upgrader
사랑 바보
온도 (Feat. 수란)
인너를 보네 (Feat. 권정열 Of 10cm)
빗물이 내려서
나를 잊지마요
Indian Boy (Feat. 장근이, B.I)
너를 보내며
밤이 두려워진 건
인십년이 지나도
Stay Together
오직 그대만
사랑해서…(이영현 솔로)
매일
New York (Feat. 백지영)
인우리 잠깐 쉬어갈래요
금토일
하늘, 바다, 나무, 별의 이야기
너를 원해 (Feat. Beenzino)
행복해 (Feat. 키겐 of 팬텀)
Lim Kim
양수경
MC몽
소란 (SORAN)
김태우
걸스데이
MC몽
젝스키스
정동하
송승헌
2NE1
바비 킴
빅마마
JBJ
MC몽
멜로망스
달샤벳
조관우
정기고
애즈원
22,000
21,900
21,900
21,900
21,900
21,900
21,800
21,800
21,800
21,800
21,700
21,700
21,600
21,600
21,500
21,500
21,500
21,500
21,500
21,500
574 (2.6%)
519 (2.4%)
1,188 (5.4%)
1,062 (4.8%)
292 (1.3%)
717 (3.3%)
1,268 (5.8%)
593 (2.7%)
926 (4.2%)
946 (4.3%)
703 (3.2%)
795 (3.7%)
487 (2.3%)
747 (3.5%)
623 (2.9%)
556 (2.6%)
365 (1.7%)
841 (3.9%)
856 (4.0%)
835 (3.9%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/0ebc83ce.png
https://d1z4u1c8ot3who.cloudfront.net/song/210308/c1eac001.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/9945b6ba.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/a3024377.jpg
https://d1z4u1c8ot3w

라이딩 (Riding) (Feat. 개코)
Say
Goodbye
정미애 (미스트롯)
유주 (여자친구)
Ashily
허각
Dok2
선우정아
라디 (Ra. D)
지선
Apink (에이핑크)
김범수
GOT7 (갓세븐)
B.A.P
Wanna One (워너원)
MC몽
정기고
유노윤호 (U-KNOW)
BIGBANG
하성운
젝스키스
웬디 (WENDY)
19,500
19,500
19,500
19,500
19,500
19,500
19,500
19,500
19,500
19,500
19,400
19,400
19,400
19,300
19,300
19,300
19,300
19,300
19,300
19,300
1,049 (5.4%)
850 (4.4%)
1,030 (5.3%)
362 (1.9%)
937 (4.8%)
728 (3.7%)
534 (2.7%)
1,024 (5.3%)
243 (1.2%)
372 (1.9%)
1,038 (5.4%)
511 (2.6%)
377 (1.9%)
841 (4.4%)
527 (2.7%)
883 (4.6%)
736 (3.8%)
1,264 (6.5%)
344 (1.8%)
1,086 (5.6%)
https://d1z4u1c8ot3who.cloudfront.net/song/210105/88dd2207.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210409/cb3fe1ef.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210809/ba85c1d7.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210407/57a9cbed.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/39402e27.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/29d7d8f0.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/b9254982.

In [31]:
# 각 리스트 요소 개수 확인
print(len(img_list),len(title_list),len(artist_list),len(transaction_prices_list), len(copyright_fee_list),len(diff_list))

900 900 900 900 900 900


In [32]:
# 46~47페이지까지 가져오기


for i in range(2):
    page_btn =  driver.find_element_by_xpath('//*[@id="market_list"]/div[3]/a['+str(i+2)+']')

    images = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.thmb > img')

    for image in images :
        url = image.get_attribute('src')
        img_list.append(url)
        print(url)
    
        
    titles = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > strong')

    for title in titles :
        title_name = title.text
        title_list.append(title_name)
        print(title_name)
    
        
        
    artists = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > div.song > span')

    for artist in artists :
        artist_name = artist.text
        artist_list.append(artist_name)
        print(artist_name)
    time.sleep(1)    
            
    transaction_prices = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dt > strong')

    for transaction_price in transaction_prices :
        price = transaction_price.text
        transaction_prices_list.append(price)
        print(price)
    time.sleep(1)
        
    copyright_fees = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd.bg > span')

    for copyright_fee in copyright_fees :
        fee = copyright_fee.text
        copyright_fee_list.append(fee)
        print(fee)

    diff = driver.find_elements_by_css_selector('#market_list > div.lst_market_song > a > dl > dd:nth-child(2) > strong')

    for d in diff :
        전일비 = d.text
        if 전일비 == '-':
            전일비.replace('-','0 (0%)')
        diff_list.append(전일비)
        
        
    time.sleep(2)    

    page_btn.send_keys(Keys.ENTER)

    time.sleep(2)

https://d1z4u1c8ot3who.cloudfront.net/song/210105/72f91733.png
https://d1z4u1c8ot3who.cloudfront.net/song/210105/671d0b50.png
https://d1z4u1c8ot3who.cloudfront.net/song/210803/69a73893.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210514/6f4aaba1.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/0579a18a.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210422/049a8be3.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210215/5ed6c6ce.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210927/8b2ca64f.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/77ee3ec2.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210108/30ff5531.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/7796f980.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/386645e2.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210723/ac948f44.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210105/ad490c31.png
https://d1z4u1c8ot3who.cloudfront.net/song/210412/0f391a22.jpg
https://d1z4u1c8ot3who.cloudfront.net/song/210607/183dc

In [34]:
# 각 리스트 요소 개수 확인
print(len(img_list),len(title_list),len(artist_list),len(transaction_prices_list), len(copyright_fee_list),len(diff_list))

940 940 940 940 940 940


In [73]:
music_frm = pd.DataFrame({
    '음원이미지' : img_list,
    '곡명' : title_list,
    '아티스트명': artist_list,
    '거래가': transaction_prices_list,
    '저작권료(1주)': copyright_fee_list,
    '전일비':diff_list
})
music_frm

Unnamed: 0,음원이미지,곡명,아티스트명,거래가,저작권료(1주),전일비
0,https://d1z4u1c8ot3who.cloudfront.net/song/210...,롤린 (Rollin'),브레이브걸스,860000,"27,044 (3.1%)","13,800 (1.6%)"
1,https://d1z4u1c8ot3who.cloudfront.net/song/210...,미리 메리 크리스마스 (Feat. 천둥 Of MBLAQ),아이유,845000,"2,350 (0.3%)","3,000 (-0.4%)"
2,https://d1z4u1c8ot3who.cloudfront.net/song/210...,축가,전우성 (노을),550100,"17,608 (3.2%)","26,200 (-4.5%)"
3,https://d1z4u1c8ot3who.cloudfront.net/song/210...,저녁하늘,에일리(AILEE),418000,"2,313 (0.6%)",0 (0%)
4,https://d1z4u1c8ot3who.cloudfront.net/song/210...,인오늘 그녀가,루그,350000,"14,626 (4.2%)",0 (0%)
...,...,...,...,...,...,...
935,https://d1z4u1c8ot3who.cloudfront.net/song/210...,인난 좋아,정준일,16000,460 (2.9%),0 (0%)
936,https://d1z4u1c8ot3who.cloudfront.net/song/210...,Rap Star,Dok2,16000,653 (4.1%),400 (2.6%)
937,https://d1z4u1c8ot3who.cloudfront.net/song/210...,"옆에도 눈이 달렸어 (Side, Eye, Moon)",윤지성,15900,430 (2.7%),0 (0%)
938,https://d1z4u1c8ot3who.cloudfront.net/song/210...,인Tonight,권순관,15900,450 (2.8%),400 (-2.5%)


In [74]:
music_frm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   음원이미지     940 non-null    object
 1   곡명        940 non-null    object
 2   아티스트명     940 non-null    object
 3   거래가       940 non-null    object
 4   저작권료(1주)  940 non-null    object
 5   전일비       940 non-null    object
dtypes: object(6)
memory usage: 44.2+ KB


In [75]:
# 맨 마지막 페이지 음원들 추가

music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210205/bf97b280.jpg","문득", "하성운",'15,700','386 (2.5%)','1,800 (-10.3%)'] 
music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210105/ec527428.jpg","L4L (Lookin' For Luv) (Feat. Dok2 & The Quiett)","BOBBY (FEAT. DOK2, THE QUIETT)",'15,700','613 (3.9%)','100 (0.6%)'] 
music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210406/f0243342.jpg","광해, 왕이 된 남자 OST","Various Artists",'15,700','331 (2.1%)','0 (0%)'] 
music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210503/cb5fa0fd.jpg","아퍼 (Feat. Kid Milli, Lil tachi, 김승민, NO:EL, C JAMM)","기리보이",'15,700','616 (3.9%)','800 (-4.8%)'] 
music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210105/85c8058e.jpg","약속","제로",'13,600','442 (3.3%)','0 (0%)'] 
music_frm.loc[len(music_frm)] = ["https://d1z4u1c8ot3who.cloudfront.net/song/210105/16392a05.jpg","마녀가 된 이유","NS 윤지",'10,000','1,572(15.7%)','0 (0%)'] 


In [76]:
music_frm

Unnamed: 0,음원이미지,곡명,아티스트명,거래가,저작권료(1주),전일비
0,https://d1z4u1c8ot3who.cloudfront.net/song/210...,롤린 (Rollin'),브레이브걸스,860000,"27,044 (3.1%)","13,800 (1.6%)"
1,https://d1z4u1c8ot3who.cloudfront.net/song/210...,미리 메리 크리스마스 (Feat. 천둥 Of MBLAQ),아이유,845000,"2,350 (0.3%)","3,000 (-0.4%)"
2,https://d1z4u1c8ot3who.cloudfront.net/song/210...,축가,전우성 (노을),550100,"17,608 (3.2%)","26,200 (-4.5%)"
3,https://d1z4u1c8ot3who.cloudfront.net/song/210...,저녁하늘,에일리(AILEE),418000,"2,313 (0.6%)",0 (0%)
4,https://d1z4u1c8ot3who.cloudfront.net/song/210...,인오늘 그녀가,루그,350000,"14,626 (4.2%)",0 (0%)
...,...,...,...,...,...,...
941,https://d1z4u1c8ot3who.cloudfront.net/song/210...,L4L (Lookin' For Luv) (Feat. Dok2 & The Quiett),"BOBBY (FEAT. DOK2, THE QUIETT)",15700,613 (3.9%),100 (0.6%)
942,https://d1z4u1c8ot3who.cloudfront.net/song/210...,"광해, 왕이 된 남자 OST",Various Artists,15700,331 (2.1%),0 (0%)
943,https://d1z4u1c8ot3who.cloudfront.net/song/210...,"아퍼 (Feat. Kid Milli, Lil tachi, 김승민, NO:EL, C ...",기리보이,15700,616 (3.9%),800 (-4.8%)
944,https://d1z4u1c8ot3who.cloudfront.net/song/210...,약속,제로,13600,442 (3.3%),0 (0%)


In [77]:
# 곡명 맨 앞글자에 '인' 제거
music_frm['곡명'] = music_frm["곡명"].str.replace('^[인]','')

In [78]:
music_frm

Unnamed: 0,음원이미지,곡명,아티스트명,거래가,저작권료(1주),전일비
0,https://d1z4u1c8ot3who.cloudfront.net/song/210...,롤린 (Rollin'),브레이브걸스,860000,"27,044 (3.1%)","13,800 (1.6%)"
1,https://d1z4u1c8ot3who.cloudfront.net/song/210...,미리 메리 크리스마스 (Feat. 천둥 Of MBLAQ),아이유,845000,"2,350 (0.3%)","3,000 (-0.4%)"
2,https://d1z4u1c8ot3who.cloudfront.net/song/210...,축가,전우성 (노을),550100,"17,608 (3.2%)","26,200 (-4.5%)"
3,https://d1z4u1c8ot3who.cloudfront.net/song/210...,저녁하늘,에일리(AILEE),418000,"2,313 (0.6%)",0 (0%)
4,https://d1z4u1c8ot3who.cloudfront.net/song/210...,오늘 그녀가,루그,350000,"14,626 (4.2%)",0 (0%)
...,...,...,...,...,...,...
941,https://d1z4u1c8ot3who.cloudfront.net/song/210...,L4L (Lookin' For Luv) (Feat. Dok2 & The Quiett),"BOBBY (FEAT. DOK2, THE QUIETT)",15700,613 (3.9%),100 (0.6%)
942,https://d1z4u1c8ot3who.cloudfront.net/song/210...,"광해, 왕이 된 남자 OST",Various Artists,15700,331 (2.1%),0 (0%)
943,https://d1z4u1c8ot3who.cloudfront.net/song/210...,"아퍼 (Feat. Kid Milli, Lil tachi, 김승민, NO:EL, C ...",기리보이,15700,616 (3.9%),800 (-4.8%)
944,https://d1z4u1c8ot3who.cloudfront.net/song/210...,약속,제로,13600,442 (3.3%),0 (0%)


In [49]:
# excel파일로 저장하기
music_frm.to_excel('뮤직카우_가격높은순.xlsx')

In [79]:
# csv 파일로 저장하기
music_frm.to_csv('뮤직카우_가격높은순.csv',index=False, encoding="utf-8-sig")