In [1]:
# Данные по книгам буквоед

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Настройки
BASE_URL = 'https://www.bookvoed.ru/catalog/books-18030?sort=relevance'
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
    'Referer': 'https://www.bookvoed.ru/'
}
DELAY_RANGE = (2, 5)  # Случайная задержка между запросами

all_books = []
session = requests.Session()

def get_page():
    try:
        time.sleep(random.uniform(*DELAY_RANGE))
        response = session.get(
            BASE_URL,
            headers={**HEADERS, 'User-Agent': get_random_user_agent()}
        )
        response.raise_for_status()
        
        if 'Доступ временно заблокирован' in response.text:
            print('Обнаружена блокировка')
            return None
        return response.text
    except Exception as e:
        print(f'Ошибка запроса: {str(e)}')
        return None

def get_random_user_agent():
    agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
        'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0'
    ]
    return random.choice(agents)

def parse_page(html):
    soup = BeautifulSoup(html, 'html.parser')
    books = soup.select('div.product-card')
    
    for book in books:
        try:
            # Название
            title_elem = book.select_one('a.product-description__link')
            title = title_elem.text.strip() if title_elem else 'Без названия'
            
            # Авторы
            authors = []
            author_links = book.select('a.ui-comma-separated-links__author')
            for author in author_links:
                authors.append(author.text.strip())
            authors_str = ', '.join(authors) if authors else 'Автор не указан'
            
            # Цены
            price_elem = book.select_one('span.price-info__price')
            old_price_elem = book.select_one('span.price-info__old-price')
            
            current_price = price_elem.text.replace('₽', '').strip() if price_elem else '0'
            old_price = old_price_elem.text.replace('₽', '').strip() if old_price_elem else None
            
            # Рейтинг
            rating_elem = book.select_one('div.product-rating-circle')
            rating = rating_elem.text.strip() if rating_elem else 'Нет рейтинга'
            
            # Оценки (лайки/дизлайки)
            likes = len(book.select('button.rating-buttons__button--like'))
            neutrals = len(book.select('button.rating-buttons__button--neutral'))
            dislikes = len(book.select('button.rating-buttons__button--dislike'))
            
            all_books.append({
                'Название': title,
                'Автор': authors_str,
                'Цена': current_price,
                'Старая цена': old_price,
                'Рейтинг': rating,
                
            })
            
        except Exception as e:
            print(f'Ошибка парсинга: {str(e)}')
            continue

# Запуск парсера
html = get_page()
if html:
    parse_page(html)

# Сохранение результатов
if all_books:
    df = pd.DataFrame(all_books)
    df.to_csv('bookvoed_data.csv', index=False, encoding='utf-8-sig')
    print(f'Сохранено {len(df)} записей')
else:
    print('Данные не найдены')

Сохранено 60 записей
