In [1]:
import scrapy

In [2]:
class BlogSpider(scrapy.Spider):
    name = 'blogspider'
    start_urls = ['https://online.auchan.hu/api/v2/products?page=1&itemsPerPage=12&listId=10177&hl=hu']

    custom_settings = {
        'ITEM_PIPELINES': {
            '__main__.SaveTitle': 1
        },
        'FEEDS': {
            'titles.csv': {
                'format': 'csv',
                'overwrite': True
            }
        }
    }

    def start_requests(self):
        yield scrapy.Request(
            url='https://online.auchan.hu/',
        )

    def parse(self, response):
        # parse json response
        for product in response.json()['results']:
            yield {
                'id': product['id'],
                'defaultVariant': product['defaultVariant']["id"],
                'productId': product['defaultVariant']["productId"],
                'price': product['defaultVariant']["price"]["gross"],
                'name': product['defaultVariant']["name"],
            }

        # go to next page
        next_url = 'https://online.auchan.hu/api/v2/products?page={}&itemsPerPage=12&listId=10177&hl=hu'.format(response.json()['currentPage']+1)
        if response.json()['currentPage'] < response.json()['pageCount']:
            yield response.follow(next_url, callback=self.parse)

        for next_page in response.css('a.next'):
            yield response.follow(next_page, self.parse)

In [3]:
class SaveTitle(object):
    def process_item(self, item, spider):
        """text processing"""

        return {
            'id': item['id'],
            'defaultVariant': item['defaultVariant'],
            'productId': item['productId'],
            'price': item['price'],
            'name': item['name'],
        }

In [4]:
from scrapy.crawler import CrawlerProcess

process = CrawlerProcess()
res = process.crawl(BlogSpider)
process.start()

2022-12-16 19:39:54 [scrapy.utils.log] INFO: Scrapy 2.7.1 started (bot: scrapybot)
2022-12-16 19:39:54 [scrapy.utils.log] INFO: Versions: lxml 4.9.2.0, libxml2 2.9.14, cssselect 1.2.0, parsel 1.7.0, w3lib 2.1.1, Twisted 22.10.0, Python 3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0], pyOpenSSL 22.1.0 (OpenSSL 3.0.7 1 Nov 2022), cryptography 38.0.4, Platform Linux-5.15.0-56-generic-x86_64-with-glibc2.35
2022-12-16 19:39:54 [scrapy.crawler] INFO: Overridden settings:
{}


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2022-12-16 19:39:54 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.epollreactor.EPollReactor
2022-12-16 19:39:54 [scrapy.extensions.telnet] INFO: Telnet Password: f7b87168bde85ecd
2022-12-16 19:39:54 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.Me