# LEGO Nowosci Scraper 2026

Nowoczesny scraper nowosci LEGO z uzyciem Playwright.
Dziala na Google Colab i lokalnie.

In [None]:
# Instalacja - uruchom raz
!pip install -q playwright pandas nest_asyncio matplotlib
!playwright install chromium
!playwright install-deps chromium
print("Instalacja zakonczona!")

In [None]:
import asyncio
import pandas as pd
import re
from datetime import datetime
from playwright.async_api import async_playwright

async def scrape_lego_nowosci():
    """Scrapuje nowosci LEGO."""
    
    products = []
    
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            locale='pl-PL',
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        )
        page = await context.new_page()
        
        print("Otwieram strone LEGO...")
        await page.goto('https://www.lego.com/pl-pl/categories/new-sets-and-products', wait_until='domcontentloaded')
        await page.wait_for_timeout(3000)
        
        # Zamknij popup wiekowy jesli jest
        try:
            age_btn = page.locator('button:has-text("18+"), button:has-text("dorosly")')
            if await age_btn.count() > 0:
                await age_btn.first.click()
                await page.wait_for_timeout(1000)
        except:
            pass
        
        # Akceptuj cookies
        try:
            cookie_btn = page.locator('button:has-text("Akceptuj"), button:has-text("Accept"), [data-test="cookie-accept-all"]')
            if await cookie_btn.count() > 0:
                await cookie_btn.first.click()
                await page.wait_for_timeout(1000)
        except:
            pass
        
        # Przewin strone zeby zaladowac wszystkie produkty
        print("Laduje produkty...")
        for i in range(8):
            await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
            await page.wait_for_timeout(1500)
            print(f"  Scroll {i+1}/8")
        
        # Wroc na gore
        await page.evaluate('window.scrollTo(0, 0)')
        await page.wait_for_timeout(1000)
        
        # Znajdz produkty - rozne selektory
        product_cards = page.locator('[data-test="product-item"], [data-test="product-leaf"], article[class*="Product"]')
        count = await product_cards.count()
        print(f"Znaleziono {count} produktow")
        
        if count == 0:
            # Alternatywna metoda - szukaj po strukturze
            product_cards = page.locator('li:has(a[href*="/product/"])')
            count = await product_cards.count()
            print(f"Metoda alternatywna: {count} produktow")
        
        # Pobierz dane z kazdego produktu
        for i in range(count):
            try:
                card = product_cards.nth(i)
                
                # Nazwa
                nazwa = ''
                try:
                    name_el = card.locator('[data-test="product-leaf-title"], h3, span[class*="Title"]').first
                    nazwa = await name_el.inner_text()
                except:
                    try:
                        link = card.locator('a[href*="/product/"]').first
                        nazwa = await link.inner_text()
                    except:
                        pass
                
                if not nazwa.strip():
                    continue
                
                # Cena
                cena = ''
                cena_num = None
                try:
                    price_el = card.locator('[data-test="product-leaf-price"], span:has-text("zl")').first
                    cena = await price_el.inner_text()
                    match = re.search(r'([\d\s]+[,.]?\d*)', cena.replace(' ', ''))
                    if match:
                        cena_num = float(match.group(1).replace(',', '.').replace(' ', ''))
                except:
                    pass
                
                # Liczba elementow
                elementy = None
                try:
                    pieces_el = card.locator('[data-test="product-leaf-piece-count-label"], span:has-text("elem")').first
                    pieces_text = await pieces_el.inner_text()
                    match = re.search(r'(\d+)', pieces_text)
                    if match:
                        elementy = int(match.group(1))
                except:
                    pass
                
                # Wiek
                wiek = ''
                try:
                    age_el = card.locator('[data-test="product-leaf-age-range-label"], span:has-text("+")').first
                    wiek = await age_el.inner_text()
                except:
                    pass
                
                # Ocena
                ocena = None
                try:
                    rating_el = card.locator('[data-test="product-leaf-rating"] span, span[class*="Rating"]').first
                    rating_text = await rating_el.inner_text()
                    match = re.search(r'([\d.,]+)', rating_text)
                    if match:
                        ocena = float(match.group(1).replace(',', '.'))
                except:
                    pass
                
                # URL
                url = ''
                try:
                    link = card.locator('a[href*="/product/"]').first
                    url = await link.get_attribute('href')
                    if url and not url.startswith('http'):
                        url = 'https://www.lego.com' + url
                except:
                    pass
                
                # Dostepnosc
                dostepnosc = 'Nieznana'
                try:
                    btn = card.locator('button').first
                    btn_text = (await btn.inner_text()).lower()
                    if 'koszyk' in btn_text or 'dodaj' in btn_text:
                        dostepnosc = 'Dostepny'
                    elif 'niedostep' in btn_text:
                        dostepnosc = 'Niedostepny'
                    elif 'wkrotce' in btn_text:
                        dostepnosc = 'Wkrotce'
                    elif 'przedsprz' in btn_text:
                        dostepnosc = 'Przedsprzedaz'
                except:
                    pass
                
                products.append({
                    'nazwa': nazwa.strip(),
                    'cena': cena.strip(),
                    'cena_zl': cena_num,
                    'elementy': elementy,
                    'wiek': wiek.strip(),
                    'ocena': ocena,
                    'dostepnosc': dostepnosc,
                    'url': url
                })
                
                if (i + 1) % 10 == 0:
                    print(f"  Przetworzono {i+1}/{count}")
                    
            except Exception as e:
                continue
        
        await browser.close()
    
    return products

print("Funkcja gotowa!")

In [None]:
# URUCHOM SCRAPER
print("="*50)
print("LEGO NOWOSCI SCRAPER 2026")
print("="*50)

# Dla Jupyter/Colab
import nest_asyncio
nest_asyncio.apply()

produkty = asyncio.get_event_loop().run_until_complete(scrape_lego_nowosci())

print(f"\n{'='*50}")
print(f"POBRANO: {len(produkty)} produktow")
print(f"{'='*50}")

In [None]:
# Utworz DataFrame
df = pd.DataFrame(produkty)
df['data_pobrania'] = datetime.now().strftime('%Y-%m-%d %H:%M')

print(f"Produktow: {len(df)}")
df

In [None]:
# STATYSTYKI
print("\n" + "="*40)
print("STATYSTYKI")
print("="*40)

if df['cena_zl'].notna().any():
    print(f"\nCENY:")
    print(f"  Najtanszy: {df['cena_zl'].min():.2f} zl")
    print(f"  Najdrozszy: {df['cena_zl'].max():.2f} zl") 
    print(f"  Srednia: {df['cena_zl'].mean():.2f} zl")

if df['elementy'].notna().any():
    print(f"\nELEMENTY:")
    print(f"  Min: {df['elementy'].min()}")
    print(f"  Max: {df['elementy'].max()}")

print(f"\nDOSTEPNOSC:")
print(df['dostepnosc'].value_counts().to_string())

In [None]:
# ZAPISZ DO CSV
plik = f"lego_nowosci_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
df.to_csv(plik, index=False, encoding='utf-8-sig')
print(f"Zapisano: {plik}")

# Pobierz na Colab
try:
    from google.colab import files
    files.download(plik)
except:
    pass

In [None]:
# TOP 10 NAJDROZSZYCH
print("\nTOP 10 NAJDROZSZYCH:")
print("-"*40)
top10 = df.nlargest(10, 'cena_zl')[['nazwa', 'cena', 'elementy']]
for i, row in top10.iterrows():
    print(f"{row['nazwa'][:40]:40} | {row['cena']:>12} | {row['elementy']} elem.")

In [None]:
# WYKRES CEN
import matplotlib.pyplot as plt

if df['cena_zl'].notna().any():
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Histogram cen
    axes[0].hist(df['cena_zl'].dropna(), bins=25, color='#FFD700', edgecolor='black')
    axes[0].set_xlabel('Cena (zl)')
    axes[0].set_ylabel('Liczba produktow')
    axes[0].set_title('Rozklad cen nowosci LEGO')
    axes[0].grid(True, alpha=0.3)
    
    # Dostepnosc
    colors = ['#4CAF50', '#f44336', '#FF9800', '#2196F3', '#9E9E9E']
    df['dostepnosc'].value_counts().plot(kind='pie', ax=axes[1], autopct='%1.1f%%', colors=colors[:len(df['dostepnosc'].unique())])
    axes[1].set_title('Dostepnosc produktow')
    axes[1].set_ylabel('')
    
    plt.tight_layout()
    plt.show()