In [1]:
import time
import pandas as pd
import numpy as np
from selenium import webdriver

In [2]:
# 드라이버 생성
driver = webdriver.Chrome('chromedriver')

In [3]:
# 페이지 접근
driver.get('https://www.meatbox.co.kr/fo/sise/siseListPage.do')

In [4]:
# 데이터 수집 함수
def dataCollect(data):
    res = []
    for dt in data:
        res.append(dt.text)
    return res

In [5]:
# 1개 페이지에서 정보(코드, 상품명, 등급, 보관상태) 수집하기
def codeCollect(page):
    codes = []
    driver.execute_script('MainMgr.getSiseList('+str(page)+')')
    time.sleep(1)
    
    # 코드 수집
    products = driver.find_elements_by_css_selector('td.align')
    for prd in products:
        cd = prd.find_element_by_tag_name('a').get_attribute('href')
        cd = cd[-8:-1]
        codes.append(cd)
        
    # 기타 수집
    names = driver.find_elements_by_id('prd_name')
    keepings = driver.find_elements_by_id('prd_keeping')
    grades = driver.find_elements_by_id('prd_grade')
    
    prd_names = dataCollect(names)
    prd_keepings = dataCollect(keepings)
    prd_grades = dataCollect(grades)
    
    return codes, prd_names, prd_grades, prd_keepings

In [6]:
# 전체 페이지에서 정보 수집하기
def infoCollect():
    df = pd.DataFrame(columns=['Code', 'Name', 'Grade', 'Keeping'])
    for page in range(25):
        res = codeCollect(page+1)
        df2 = pd.DataFrame({'Code':res[0], 'Name':res[1], 'Grade':res[2], 'Keeping':res[3]})
        df = df.append(df2).reset_index(drop=True)
    return df

In [7]:
# 코드별 가격 수집하기
def priceCollect(code):
    driver.get('https://www.meatbox.co.kr/fo/sise/siseListPage.do')
    driver.execute_script('MainMgr.goViewPage('+code+')')
    time.sleep(5)
    driver.execute_script('MainMgr.showChart()')
    driver.execute_script('MainMgr.drawChart("year")')
    time.sleep(5)
    data = driver.find_elements_by_tag_name('circle')
    prices = []
    for dt in data:
        price = dt.get_attribute('aria-label')
        price = [code] + price.split()
        price[2] = int(price[2].replace(',',''))
        prices.append(price)
    return prices

In [8]:
# 전체 가격 수집하기
def priceCollectAll(info):
    df = pd.DataFrame(columns=['Code', 'Date', 'Price'])
    for code in info['Code']:
        price = priceCollect(code)
        df2 = pd.DataFrame(price, columns=['Code', 'Date', 'Price'])
        df = df.append(df2).reset_index(drop=True)

In [None]:
# 정보 수집
info = infoCollect()
prices = priceCollectAll(info)

In [90]:
# 결과 저장
writer = pd.ExcelWriter('미트박스.xlsx', 'xlsxwriter')
info.to_excel(writer, '상품정보', index=False)
prices.to_excel(writer, '가격정보', index=False)
writer.save()
writer.close()