In [None]:
# Step 0: 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

# 작업 디렉토리 설정
import os
project_path = '/content/drive/MyDrive/aladin-project'

# 폴더가 없으면 생성
os.makedirs(project_path, exist_ok=True)
os.makedirs(f'{project_path}/data/raw', exist_ok=True)

%cd {project_path}
print(f"현재 작업 디렉토리: {os.getcwd()}")

# 필요 라이브러리
import re
import time, datetime, ssl
import pandas as pd
import urllib.request
from itertools import count
from bs4 import BeautifulSoup
from random import uniform

class AladinBestSeller():
    myencoding = 'utf-8'

    def getSoup(self):
        if self.soup == None:
            return None
        else:
            return BeautifulSoup(self.soup, 'html.parser')

    def get_request_url(self):
        request = urllib.request.Request(self.url)
        try:
            context = ssl._create_unverified_context()
            response = urllib.request.urlopen(request, context=context)
            if response.getcode() == 200:
                return response.read().decode(self.myencoding)
        except Exception as err:
            print(err)
            now = datetime.datetime.now()
            msg = '[%s] error for url %s' % (now, self.url)
            print(msg)
            return None

    def save2Csv(self, result):
        data = pd.DataFrame(result, columns=self.mycolumns)
        # 구글 드라이브 경로에 저장
        save_path = f'data/raw/{self.siteName}.csv'
        data.to_csv(save_path, encoding='utf-8-sig', index=True)
        print(f"✅ 파일 저장 완료: {save_path}")

    def __init__(self, siteName, url):
        self.siteName = siteName
        self.url = url
        self.mycolumns = ['year', 'month', 'rank', 'category', 'title', 'price', 'star_score', 'item_id']
        self.soup = self.get_request_url()

####################################################
siteName = 'aladin'
base_url = 'https://www.aladin.co.kr/shop/common/wbest.aspx'
####################################################

def getData():
    savedData = []
    for year in range(2020, 2026):
        last_month = 9 if year == 2025 else 12
        for month in range(1, last_month + 1):
            url = base_url
            url += '?BranchType=1&CID=0&Year=' + str(year)
            url += '&Month=' + str(month)
            url += '&Week=1&BestType=MonthlyBest&SearchSubBarcode='
            print(url)

            aladin = AladinBestSeller(siteName, url)
            soup = aladin.getSoup()

            if soup is None:
                break

            for rank, item in enumerate(soup.select("div.ss_book_box"), start=1):
                try:
                    catrgory_tag = item.select_one("span.tit_catrgory")
                    catrgory = catrgory_tag.get_text(strip=True).strip('[]') if catrgory_tag else "N/A"

                    title_tag = item.select_one("a.bo3")
                    title = title_tag.get_text(strip=True) if title_tag else "N/A"

                    item_id = "N/A"
                    if title_tag and title_tag.has_attr('href'):
                        match = re.search(r'ItemId=(\d+)', title_tag['href'])
                        if match:
                            item_id = match.group(1)

                    price_tag = item.select_one("span.ss_p2")
                    price_text = price_tag.get_text(strip=True).split('원')[0] if price_tag else "0"
                    price = int(price_text.replace(",", ""))

                    star_score_tag = item.select_one("span.star_score")
                    star_score = float(star_score_tag.get_text(strip=True)) if star_score_tag else 0.0

                    savedData.append([year, month, rank, catrgory, title, price, star_score, item_id])
                except Exception as err:
                    print(err)
                    continue

            time.sleep(uniform(1, 2))

    aladin.save2Csv(savedData)
    print('=' * 50)
    print(f"총 {len(savedData)}개 데이터 수집 완료")
    return len(savedData)

####################################################
print(siteName + ' 베스트셀러 크롤링 시작')
total_count = getData()
print(siteName + ' 베스트셀러 크롤링 끝')
print(f"data/raw/aladin.csv 파일이 구글 드라이브에 저장되었습니다.")