In [1]:
from concurrent.futures import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup
import json
from html import unescape
from time import sleep
import random
from datetime import datetime

# 전체 페이지 수 가져오기
def get_page_num():
    url = 'https://lovingitvegan.com/category/recipe/'
    soup = BeautifulSoup(requests.get(url).text, 'html.parser')
    max_num = soup.find('div', {'class': 'nav-links'}).contents[-3].text
    nums = list(range(1, (int(max_num) + 1)))

    return nums


# 입력한 페이지의 전체 레시피 링크 가져오기
def get_links(i):
    link_list = list()

    url = 'https://lovingitvegan.com/category/recipe/page/' + str(i)
    soup = BeautifulSoup(requests.get(url).text, 'html.parser')
    articles = soup.find_all('li', {'class': 'listing-item'})
    for article in articles:
        try:
            link_list.append(article.contents[1].get('href'))
        except:
            pass

    return link_list

# 입력한 링크의 제목, 댓글, 레시피, 재료, 조리시간, 분량, 영양정보, 이미지 가져오기
def get_contents(url):
    contents = dict()
    soup = BeautifulSoup(requests.get(url).text, 'html.parser')

    # 제목
    title = soup.find('h1', {'class': 'entry-title'})
    if title:
        title = title.text
    else:
        pass

    # 댓글
    com_list = list()

    comments = soup.find_all('ol', {'class': 'comment-list'})
    for comment in comments:
        for c_num in range(1, 10000, 3):
            try:
                com_list.append((comment.contents[c_num].contents[1].contents[3].text).strip('\n'))
            except:
                pass

    ## 댓글의 갯수
    comments_num = len(com_list) + 1

    # 레시피
    recipes_list = list()
    steps_list = list()
    recipes = soup.find_all('li', {'class': 'wprm-recipe-instruction'})
    for recipe in recipes:
        if recipe:
            step = str(int(recipe.get('id').split('-')[-1]) + 1)
            recipes_list.append(step + '. ' + recipe.text)
        else:
            pass

    # 재료
    ingredients_list = list()
    ingredients = soup.find_all('input', {'class': 'wprm-checkbox'})
    for ingredient in ingredients:
        if ingredient:
            ingredients_list.append(ingredient.get('aria-label'))
        else:
            pass

    # 조리시간
    cooktime = ''
    cooktimes = soup.find_all('span', {'class': 'wprm-recipe-time wprm-block-text-normal'})
    if cooktimes:
        cooktime = cooktimes[-1].text
    else:
        cooktime = ''

    # 분량
    servings = soup.find('span', {
        'class': 'wprm-recipe-servings wprm-recipe-details wprm-recipe-servings-41441 wprm-recipe-servings-adjustable-disabled wprm-block-text-normal'})
    if servings:
        servings = servings.text
    else:
        servings = ''

    # 영양정보
    calories = soup.find('span', {
        'class': 'wprm-nutrition-label-text-nutrition-container wprm-nutrition-label-text-nutrition-container-calories'})
    if calories:
        calories = calories.text.split(' ')[-1]
    else:
        calories = ''

    carbs = soup.find('span', {'class',
                              'wprm-nutrition-label-text-nutrition-container wprm-nutrition-label-text-nutrition-container-carbohydrates'})
    if carbs:
        carbs = carbs.text.split(' ')[-1]
    else:
        carbs = ''

    protein = soup.find('span', {
        'class': 'wprm-nutrition-label-text-nutrition-container wprm-nutrition-label-text-nutrition-container-protein'})
    if protein:
        protein = protein.text.split(' ')[-1]
    else:
        protein = ''

    fat = soup.find('span', {
        'class': 'wprm-nutrition-label-text-nutrition-container wprm-nutrition-label-text-nutrition-container-fat'})
    if fat:
        total_fat = fat.text.split(' ')[-1]
    else:
        total_fat = ''

    # 이미지
    image = soup.find('figure', {'class', 'wp-block-image size-full'})
    if image:
        image = image.contents[0].get('src')
    else:
        pass

    if recipes_list != {}:
        if comments_num > 4:
            # 주소 (사이트)
            contents['site'] = url
            # 제목
            contents['title'] = title
            # 재료
            contents['ingredients'] = ingredients_list
            # 조리시간
            contents['time'] = cooktime
            # 분량
            contents['serving'] = servings
            # 레시피 내용
            contents['recipe'] = recipes_list
            # 칼로리
            contents['calories'] = calories
            # 탄수화물
            contents['carbs'] = carbs
            # 단백질
            contents['protein'] = protein
            # 지방
            contents['total_fat'] = total_fat
            # 댓글
            contents['comments'] = com_list
            # 이미지
            contents['image'] = image
        else:
            pass
    else:
        contents = ''

    print('#', comments_num, '-url: ', url)

    return contents

# 전체 페이지 레시피 댓글 가져오기
def get_all_page_comment(nums):
    total = dict()
    title_comments = list()

    with ThreadPoolExecutor(max_workers=10) as executor:
        for num in nums:
            links = get_links(num)
            for link in links:
                content = executor.submit(get_contents, link)
                title_comments.append(content.result())
                a = random.randrange(1, 11)
                sleep(a)

    date = datetime.today().strftime("%Y%m%d")
    
    total['date'] = date                
    total['lovingitvegan'] = title_comments
    return total


# 메인에서 실행
if __name__ == '__main__':
    nums = get_page_num()
    total = get_all_page_comment(nums)

    date = datetime.today().strftime("%Y%m%d")
        
    with open(f'D:\\fruit_hada\\crawling\\crawling_result\\{date}_lovingitvegan_all.json', 'w', encoding='utf-8-sig') as file:
        json.dump(total, file, indent="\t")            
        

# 1 -url:  https://lovingitvegan.com/vegan-brown-sugar-cookies/
# 4 -url:  https://lovingitvegan.com/vegan-oatmeal-cranberry-cookies/
# 2 -url:  https://lovingitvegan.com/vegan-artichoke-dip/
# 6 -url:  https://lovingitvegan.com/vegan-gingerbread-loaf/
# 4 -url:  https://lovingitvegan.com/vegan-pumpkin-pasta/
# 13 -url:  https://lovingitvegan.com/vegan-feta/
# 3 -url:  https://lovingitvegan.com/vegan-pumpkin-scones/
# 2 -url:  https://lovingitvegan.com/vegan-stuffing/
# 3 -url:  https://lovingitvegan.com/jackfruit-curry/
# 4 -url:  https://lovingitvegan.com/vegan-pumpkin-oatmeal-cookies/
# 1 -url:  https://lovingitvegan.com/vegan-pumpkin-waffles/
# 7 -url:  https://lovingitvegan.com/vegan-cowboy-cookies/
# 4 -url:  https://lovingitvegan.com/chocolate-sheet-cake-with-chocolate-fudge-frosting/
# 8 -url:  https://lovingitvegan.com/easiest-baba-ganoush/
# 2 -url:  https://lovingitvegan.com/vegan-peanut-butter-brownies/
# 9 -url:  https://lovingitvegan.com/thai-red-curry/
# 2 -url:  https:/

# 3 -url:  https://lovingitvegan.com/vegan-buffalo-sauce/
# 14 -url:  https://lovingitvegan.com/vegan-bolognese/
# 12 -url:  https://lovingitvegan.com/vegan-cauliflower-cheese/
# 6 -url:  https://lovingitvegan.com/green-juice/
# 7 -url:  https://lovingitvegan.com/vegan-ham/
# 7 -url:  https://lovingitvegan.com/vegan-spinach-dip/
# 11 -url:  https://lovingitvegan.com/cherry-smoothie/
# 11 -url:  https://lovingitvegan.com/gluten-free-vegan-vanilla-cupcakes/
# 6 -url:  https://lovingitvegan.com/vegan-vanilla-pudding/
# 10 -url:  https://lovingitvegan.com/vegan-katsu-curry/
# 16 -url:  https://lovingitvegan.com/vegan-biscuits/
# 4 -url:  https://lovingitvegan.com/vegan-cannelloni/
# 7 -url:  https://lovingitvegan.com/tofu-ricotta/
# 11 -url:  https://lovingitvegan.com/vegan-anzac-biscuits/
# 5 -url:  https://lovingitvegan.com/vegan-mince/
# 12 -url:  https://lovingitvegan.com/vegan-zucchini-bread/
# 8 -url:  https://lovingitvegan.com/green-tea-cupcakes/
# 4 -url:  https://lovingitvegan.com

# 13 -url:  https://lovingitvegan.com/white-bread/
# 12 -url:  https://lovingitvegan.com/oreo-cupcakes/
# 4 -url:  https://lovingitvegan.com/vegan-taco-meat/
# 2 -url:  https://lovingitvegan.com/spicy-hummus/
# 10 -url:  https://lovingitvegan.com/vegan-banana-muffins/
# 6 -url:  https://lovingitvegan.com/vegan-lentil-soup/
# 12 -url:  https://lovingitvegan.com/fried-tofu/
# 9 -url:  https://lovingitvegan.com/vegan-coconut-pancakes/
# 3 -url:  https://lovingitvegan.com/vegan-potato-salad/
# 16 -url:  https://lovingitvegan.com/vegan-macaroons/
# 8 -url:  https://lovingitvegan.com/vegan-pasta-salad/
# 14 -url:  https://lovingitvegan.com/marinated-tofu/
# 5 -url:  https://lovingitvegan.com/vegan-mushroom-gravy/
# 11 -url:  https://lovingitvegan.com/vegan-scalloped-potatoes/
# 8 -url:  https://lovingitvegan.com/vegan-lasagna/
# 7 -url:  https://lovingitvegan.com/vegan-snickers-bars-gluten-free/
# 3 -url:  https://lovingitvegan.com/vegan-ramen/
# 15 -url:  https://lovingitvegan.com/cheesy-ga

# 4 -url:  https://lovingitvegan.com/vegan-pizza/
# 9 -url:  https://lovingitvegan.com/vegan-pea-soup/
# 16 -url:  https://lovingitvegan.com/vegan-cinnamon-rolls/
# 4 -url:  https://lovingitvegan.com/vegan-minestrone-soup/
# 12 -url:  https://lovingitvegan.com/vegan-avocado-ice-cream/
# 13 -url:  https://lovingitvegan.com/baked-vegan-mac-and-cheese/
# 16 -url:  https://lovingitvegan.com/vegan-cream-cheese/
# 3 -url:  https://lovingitvegan.com/vegan-lentil-curry/
# 3 -url:  https://lovingitvegan.com/vegan-chocolate-brownies/
# 14 -url:  https://lovingitvegan.com/vegan-pesto-pasta-pan-roasted-cherry-tomatoes/
# 16 -url:  https://lovingitvegan.com/vegan-banana-oatmeal-cookies/
# 4 -url:  https://lovingitvegan.com/vegan-bruschetta/
# 4 -url:  https://lovingitvegan.com/vegan-zucchini-soup/
# 2 -url:  https://lovingitvegan.com/vegan-oreo-cake/
# 16 -url:  https://lovingitvegan.com/vegan-zucchini-fritters/
# 11 -url:  https://lovingitvegan.com/vegan-chocolate-cupcakes/
# 9 -url:  https://lovi