In [1]:
import requests
import bs4
import pandas as pd

In [2]:
class Chart():
    def __init__(self):
        self.df = pd.DataFrame(columns=["서비스", "순위", "타이틀", "가수"])
        self.services = {
            "Bugs": {
                "url": "https://music.bugs.co.kr/chart",
                "titles": 'p.title a',
                "artists": 'p.artist',
                "pages": 1
            },
            "Melon": {
                "url": "https://www.melon.com/chart/index.htm",
                "titles": 'div.ellipsis.rank01 a',
                "artists": 'div.ellipsis.rank02',
                "pages": 1
            },
            "Genie": {
                "url_template": "https://www.genie.co.kr/chart/top200?ditc=D&ymd=20250123&hh=14&rtm=Y&pg={page}",
                "titles": 'td.info .title.ellipsis',
                "artists": 'td.info .artist.ellipsis',
                "pages": 2
            }
        }
    
    def fetch_chart(self, platform, service):
        rank = 1
        for page in range(1, service["pages"] + 1):
            if "url_template" in service:
                url = service["url_template"].format(page=page)
            else:
                url = service["url"]
# 멜론, 지니 접속시 헤더 필요
# 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36'
            header = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36'}
            res = requests.get(url, stream=True, headers=header)
            html = res.content
            bs = bs4.BeautifulSoup(html,'html.parser')
            titles = bs.select(service["titles"])
            artists = bs.select(service["artists"])
            for title, artist in zip(titles[:100],artists[:100]):
                if platform == "Genie":
                    singer = artist.text.strip()
                else:
                    singer = artist.find_all('a')[0].text.strip()
                self.df.loc[len(self.df)] = {
                    "서비스" : platform,
                    "순위" : rank,
                    "타이틀" : title.text.strip(),
                    "가수" : singer,
                }
                rank += 1
    
    def get_chart(self, platform=None):
        if platform:
            service = self.services.get(platform)
            if service:
                self.fetch_chart(platform, service)
        else:
            for platform, service in self.services.items():
                self.fetch_chart(platform, service)
        return self.df

In [3]:
# get_chart()에 받는 인자가 없는 경우 세가지 플랫폼 전부 출력
chart = Chart()
final_df = chart.get_chart()
final_df.to_excel("Chart.xlsx", index=False)
final_df

Unnamed: 0,서비스,순위,타이틀,가수
0,Bugs,1,REBEL HEART,IVE (아이브)
1,Bugs,2,"HOME SWEET HOME (feat. 태양, 대성)",G-DRAGON
2,Bugs,3,Whiplash,aespa
3,Bugs,4,toxic till the end,로제(ROSÉ)
4,Bugs,5,Love Hangover (feat. Dominic Fike),제니 (JENNIE)
...,...,...,...,...
295,Genie,96,Stay,The Kid LAROI & Justin Bieber
296,Genie,97,2002,Anne-Marie
297,Genie,98,숲,최유리
298,Genie,99,"무제(無題) (Untitled, 2014)",G-DRAGON


In [4]:
# get_chart()에 각 플랫폼의 이름을 넣을 경우 해당 플랫폼의 1 ~ 100위까지 출력
chart2 = Chart()
genie_df = chart2.get_chart("Genie")
genie_df

Unnamed: 0,서비스,순위,타이틀,가수
0,Genie,1,HOME SWEET HOME (Feat. 태양 & 대성),G-DRAGON
1,Genie,2,REBEL HEART,IVE (아이브)
2,Genie,3,나는 반딧불,황가람
3,Genie,4,Whiplash,aespa
4,Genie,5,APT.,로제 (ROSÉ) & Bruno Mars
...,...,...,...,...
95,Genie,96,Stay,The Kid LAROI & Justin Bieber
96,Genie,97,2002,Anne-Marie
97,Genie,98,숲,최유리
98,Genie,99,"무제(無題) (Untitled, 2014)",G-DRAGON
