In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime
date = datetime.datetime.now()

# 무신사 인기검색어 300 크롤링

In [5]:
def hot300(date):
    
    url = "https://www.musinsa.com/ranking/keyword"
    
    # url 정보 불러오기
    response = requests.get(url)

    # html 형식으로 바꿔주기 
    html = BeautifulSoup(response.text, "html.parser")

    # df 변환을 위해 빈 리스트 만들기
    rank_list = []

    # 300개의 랭킹 크롤링
    for i in range(300):
        rank = html.select("p.p_srank")[i].text.replace(" ", "").split(".")[0]
        name = html.select("p.p_srank")[i].text.replace(" ", "").split(".")[1]

        if html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[2] == "":
            increase_cnt = html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[2].replace("", "0")
        else:
            increase_cnt = html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[2]

        if "▲" in html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[1]:
            increase = html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[1].replace("▲", "1")
        elif "▼" in html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[1]:
            increase = html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[1].replace("▼", "0")
        else:
            increase = html.select("p.p_srank_last")[i].text.replace(" ", "").split("\n")[1]

        rank_dict = {
            "rank": rank,
            "name": name,
            "increase": increase,
            "increase_cnt": increase_cnt
        }

        rank_list.append(rank_dict)
    
    # df 생성
    ranking = pd.DataFrame(rank_list, columns = ["rank", "name", "increase", "increase_cnt"])
    
    # 순위 타입 int로 변경
    ranking["rank"] = ranking["rank"].astype("int")
    
    # 증가 및 하락 column int로 변경
    ranking["increase_cnt"] = ranking["increase_cnt"].astype("int")
    
    # 저장 후 csv 내보내기
    ranking.to_csv(f"{date}_musinsa_top300.csv", index = False)
    
    return ranking

# 무신사 브랜드 TOP 100 크롤링

In [6]:
def brand100(date):
    
    url = "https://www.musinsa.com/ranking/brand"
    
    # url 정보 불러오기
    response = requests.get(url)

    # html 형식으로 바꿔주기 
    html = BeautifulSoup(response.text, "html.parser")
    
    # df 변환을 위해 빈 리스트 만들기
    brand_list = []
    url_list = []

    # Top 100까지만 존재하기 때문에 
    for i in range(100):
        name = html.select("p.brand_name")[i].text.strip()
        en_name = html.select("p.brand_name_en")[i].text.strip()

        brand_dict = {
            "name": name,
            "en_name": en_name
        }

        brand_list.append(brand_dict)

    for name in html.select('a.brandLogo'):
        url = name.attrs['href']

        url_dict = {
            "url": url
        }

        url_list.append(url_dict)

    url_df = pd.DataFrame(url_list)
    brand_df = pd.DataFrame(brand_list)

    total_brand = pd.concat([brand_df, url_df], axis = 1)
    total_brand = total_brand.reset_index()
    total_brand.columns = ["rank", "name", "en_name", "url"]

    total_brand.to_csv(f"{date}_musinsa_brand100.csv", index = False)
    
    return total_brand

# 무신사 Best item Top 100 크롤링

In [7]:
def best_list(date):

    best_list = []

    # url 정보 입력
    for page in range(1, 6):
        url = f"https://www.musinsa.com/ranking/best?period=now&age=ALL&mainCategory=&subCategory=&leafCategory=&price=&golf=false&kids=false&newProduct=false&exclusive=false&discount=false&soldOut=false&page={page}&viewType=small&priceMin=&priceMax="

        # url 정보 불러오기
        response = requests.get(url)

        # html 형식으로 바꿔주기 
        html = BeautifulSoup(response.text, "html.parser")

        seed = np.random.randint(100) # 시드도 난수로 만들고
        np.random.seed(seed) # 시드 생성
        a = np.random.randint(5) # 시드에서 난수 생성
        time.sleep(a)

        for i in range(90):

            # 브랜드 명
            brand = html.select("p.item_title")[i].text.strip()

            # 상품 명
            if len(html.select("p.list_info")[i].text.replace(" ", "").split()) == 2:
                product = html.select("p.list_info")[i].text.replace(" ", "").split()[1]
                no_deli = (html.select("p.list_info")[i].text.replace(" ", "").split()[0]+" "+"예정")
            else:
                product = html.select("p.list_info")[i].text.replace(" ", "").split()[0]
                no_deli = "-"

            # 가격
            if len(html.select("p.price")[i].text.split()) == 2:
                price = html.select("p.price")[i].text.split()[0]
                sale_price = html.select("p.price")[i].text.split()[1]
            else:
                price = html.select("p.price")[i].text.split()[0]
                sale_price = "-"

            # 상품 순위, 증가 및 하락
            if len(html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()) == 3:
                rank = html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()[0]

                if "▲" in html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()[1]:
                    increase = 1
                elif "▼" in html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()[1]:
                    increase = 0

                increase_cnt = html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()[2]

            else:
                rank = html.select("p.n-label.label-default.txt_num_rank")[i].text.replace(" ", "").split()[0]
                increase = "-"
                increase_cnt = "-"


            best_dict = {
                "rank": rank,
                "brand": brand,
                "product": product,
                "price": price,
                "sale_price": sale_price,
                "increase": increase,
                "increase_cnt": increase_cnt,
                "no_deli": no_deli
            }

            best_list.append(best_dict)

    best_df = pd.DataFrame(best_list)
    best_df.to_csv(f"{date}_musinsa_best.csv", index = False)
    
    return best_df

In [8]:
hot300(date.strftime("%Y%m%d")[2:])

Unnamed: 0,rank,name,increase,increase_cnt
0,1,후드티,-,0
1,2,맨투맨,-,0
2,3,후드집업,1,1
3,4,백팩,0,1
4,5,니트,1,1
...,...,...,...,...
295,296,노매뉴얼,-,0
296,297,슬링백,1,13
297,298,칼하트후드집업,1,14
298,299,나이키바지,0,23


In [8]:
best_list(date.strftime("%Y%m%d")[2:])

Unnamed: 0,rank,brand,product,price,sale_price,increase,increase_cnt,no_deli
0,1위,노스페이스,NM2DM51A_빅샷,"140,000원","126,000원",0,1,2/22배송 예정
1,2위,반스,올드스쿨(캔버스)-블랙:트루화이트/VN000ZDF1WX1,"79,000원",-,1,1,-
2,3위,마뗑킴,MATINKIMLOGOCOATINGJUMPER,"143,000원","128,700원",-,-,4/22배송 예정
3,4위,뉴발란스,NBPDBF754G/ML2002RA(GREY),"169,000원",-,-,-,-
4,5위,세터,테오프리미엄옥스퍼드프랜츠워크자켓바닐라크림,"249,000원","224,100원",0,8,3/23배송 예정
...,...,...,...,...,...,...,...,...
445,446위,엠엠지엘,비건레더메신저백(블랙),"109,000원",-,0,999,2/25배송 예정
446,447위,세터,오슬로코듀로이셔츠_오이스터베이지,"129,000원",-,0,999,-
447,448위,니티드,벌키브러쉬아가일가디건DARKBLACK,"99,000원","69,300원",0,200,-
448,449위,리,트위치로고코듀라메신저백블랙,"59,000원","47,000원",1,55,-
