In [1]:
# ============ 1. –ù–ê–°–¢–†–û–ô–ö–ê –û–ö–†–£–ñ–ï–ù–ò–Ø ============
import nest_asyncio
nest_asyncio.apply()

import asyncio
import aiohttp
import json
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from pathlib import Path
from typing import List, Dict, Optional, Any, Union
import logging
import hashlib
import re
import os
import warnings
warnings.filterwarnings('ignore')

# –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∏—è
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print(" –ë–∏–±–ª–∏–æ—Ç–µ–∫–∏ –∑–∞–≥—Ä—É–∂–µ–Ω—ã –∏ –æ–∫—Ä—É–∂–µ–Ω–∏–µ –Ω–∞—Å—Ç—Ä–æ–µ–Ω–æ!")

# ============ 2. –ú–û–î–ï–õ–ò PYDANTIC ============
from pydantic import BaseModel, Field, validator
from enum import Enum
from sentence_transformers import SentenceTransformer

class EventCategory(str, Enum):
    """15+ –∫–∞—Ç–µ–≥–æ—Ä–∏–π —Å–æ–±—ã—Ç–∏–π"""
    CONCERT = "concert"
    THEATER = "theater"
    EXHIBITION = "exhibition"
    FESTIVAL = "festival"
    EDUCATION = "education"
    PARTY = "party"
    SPORT = "sport"
    QUEST = "quest"
    EXCURSION = "excursion"
    SHOW = "show"
    STANDUP = "standup"
    KIDS = "kids"
    FASHION = "fashion"
    GASTRONOMY = "gastronomy"
    CINEMA = "cinema"
    LECTURE = "lecture"
    MASTERCLASS = "masterclass"
    TOUR = "tour"
    OTHER = "other"

class PlaceModel(BaseModel):
    """–ü–æ–ª–Ω–∞—è –º–æ–¥–µ–ª—å –º–µ—Å—Ç–∞"""
    id: Optional[int] = None
    title: str = Field(..., description="–ù–∞–∑–≤–∞–Ω–∏–µ –º–µ—Å—Ç–∞")
    address: Optional[str] = None
    subway: Optional[str] = None
    coords: Optional[Dict[str, float]] = None
    phone: Optional[str] = None
    site_url: Optional[str] = None
    city: Optional[str] = None
    is_closed: bool = False
    working_hours: Optional[Dict] = None
    
    @property
    def full_address(self) -> str:
        parts = []
        if self.title:
            parts.append(self.title)
        if self.address:
            parts.append(self.address)
        if self.subway:
            parts.append(f"–º. {self.subway}")
        return ", ".join(parts)

class DateModel(BaseModel):
    """–ú–æ–¥–µ–ª—å –¥–∞—Ç—ã —Å–æ–±—ã—Ç–∏—è"""
    start: int  # timestamp
    end: Optional[int] = None
    is_continuous: bool = False
    
    @property
    def start_dt(self) -> datetime:
        return datetime.fromtimestamp(self.start)
    
    @property
    def end_dt(self) -> Optional[datetime]:
        return datetime.fromtimestamp(self.end) if self.end else None
    
    @property
    def formatted(self) -> str:
        start_str = self.start_dt.strftime("%d.%m.%Y %H:%M")
        if self.end:
            end_str = self.end_dt.strftime("%d.%m.%Y %H:%M")
            if start_str[:10] == end_str[:10]:
                return f"{start_str} - {end_str[11:]}"
            return f"{start_str} - {end_str}"
        return start_str

class PriceModel(BaseModel):
    """–ú–æ–¥–µ–ª—å —Ü–µ–Ω—ã"""
    is_free: bool = False
    min: Optional[float] = None
    max: Optional[float] = None
    currency: str = "RUB"
    description: Optional[str] = None
    
    @property
    def display(self) -> str:
        if self.is_free:
            return "–ë–µ—Å–ø–ª–∞—Ç–Ω–æ"
        elif self.min and self.max:
            return f"{self.min:.0f} - {self.max:.0f} ‚ÇΩ"
        elif self.min:
            return f"–æ—Ç {self.min:.0f} ‚ÇΩ"
        return self.description or "–¶–µ–Ω–∞ –Ω–µ —É–∫–∞–∑–∞–Ω–∞"

class ImageModel(BaseModel):
    """–ú–æ–¥–µ–ª—å –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è"""
    url: str
    thumbnail: Optional[str] = None
    source: Optional[Dict] = None

class TagModel(BaseModel):
    """–ú–æ–¥–µ–ª—å —Ç–µ–≥–∞"""
    name: str
    slug: Optional[str] = None
    id: Optional[int] = None

class ParticipantModel(BaseModel):
    """–ú–æ–¥–µ–ª—å —É—á–∞—Å—Ç–Ω–∏–∫–∞"""
    name: str
    role: Optional[str] = None
    images: Optional[List[Dict]] = None

class EventModel(BaseModel):
    """–ü–û–õ–ù–ê–Ø –ú–û–î–ï–õ–¨ –°–û–ë–´–¢–ò–Ø"""
    id: int
    title: str
    short_title: Optional[str] = None
    description: Optional[str] = None
    slug: Optional[str] = None
    category: EventCategory
    tags: List[TagModel] = []
    dates: List[DateModel] = []
    publication_date: Optional[int] = None
    age_restriction: Optional[str] = None
    place: Optional[PlaceModel] = None
    place_id: Optional[int] = None
    price: Optional[PriceModel] = None
    images: List[ImageModel] = []
    video: Optional[Dict] = None
    participants: List[ParticipantModel] = []
    url: str
    site_url: Optional[str] = None
    buy_url: Optional[str] = None
    favorites_count: int = 0
    comments_count: int = 0
    external_id: Optional[str] = None
    location: str = "msk"
    is_free: bool = False
    is_exclusive: bool = False
    is_editors_choice: bool = False
    is_approved: bool = True
    parsed_at: datetime = Field(default_factory=datetime.now)
    
    @property
    def embedding_text(self) -> str:
        parts = [self.title]
        if self.description:
            parts.append(self.description[:500])
        if self.tags:
            parts.extend([tag.name for tag in self.tags[:5]])
        if self.place:
            if self.place.title:
                parts.append(self.place.title)
            if self.place.address:
                parts.append(self.place.address)
        parts.append(self.category.value)
        return " ".join(parts)
    
    @property
    def date_range_text(self) -> str:
        if not self.dates:
            return "–î–∞—Ç–∞ –Ω–µ —É–∫–∞–∑–∞–Ω–∞"
        if len(self.dates) == 1:
            return self.dates[0].formatted
        dates_str = [date.formatted for date in self.dates[:3]]
        if len(self.dates) > 3:
            dates_str.append(f"... –µ—â–µ {len(self.dates)-3}")
        return "; ".join(dates_str)
    
    class Config:
        json_encoders = {
            datetime: lambda dt: dt.isoformat(),
        }

print(" Pydantic –º–æ–¥–µ–ª–∏ —Å–æ–∑–¥–∞–Ω—ã!")

# ============ 3. –ë–ê–ó–ê –î–ê–ù–ù–´–• SQLite –° –í–ï–ö–¢–û–†–ù–´–ú–ò –≠–ú–ë–ï–î–î–ò–ù–ì–ê–ú–ò ============
class EventVectorDatabaseSQLite:
    """–í–µ–∫—Ç–æ—Ä–Ω–∞—è –±–∞–∑–∞ –¥–∞–Ω–Ω—ã—Ö –Ω–∞ –æ—Å–Ω–æ–≤–µ SQLite —Å —ç–º–±–µ–¥–¥–∏–Ω–≥–∞–º–∏"""
    
    def __init__(self, db_path: str = "events_vector.db", embedding_model: str = "all-MiniLM-L6-v2"):
        self.db_path = db_path
        try:
            # –û—Ç–∫–ª—é—á–∞–µ–º –ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ –¥–ª—è —É—Å–∫–æ—Ä–µ–Ω–∏—è
            import logging
            logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
            logging.getLogger("transformers").setLevel(logging.WARNING)
            
            self.model = SentenceTransformer(embedding_model)
            self.embedding_dim = self.model.get_sentence_embedding_dimension()
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –º–æ–¥–µ–ª–∏: {e}")
            logger.info("–ò—Å–ø–æ–ª—å–∑—É—é –∑–∞–≥–ª—É—à–∫—É –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è...")
            # –°–æ–∑–¥–∞–µ–º –∑–∞–≥–ª—É—à–∫—É –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è
            self.embedding_dim = 384
            self.model = None
        self.conn = None
        self._init_db()
        print(f" –í–µ–∫—Ç–æ—Ä–Ω–∞—è –ë–î SQLite —Å–æ–∑–¥–∞–Ω–∞: {db_path}")
        print(f" –†–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤: {self.embedding_dim}")
    
    def _init_db(self):
        """–ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö"""
        self.conn = sqlite3.connect(self.db_path)
        cursor = self.conn.cursor()
        
        # –û—Å–Ω–æ–≤–Ω–∞—è —Ç–∞–±–ª–∏—Ü–∞ —Å–æ–±—ã—Ç–∏–π
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS events (
            id INTEGER PRIMARY KEY,
            title TEXT NOT NULL,
            description TEXT,
            category TEXT,
            dates_text TEXT,
            price_text TEXT,
            place_text TEXT,
            url TEXT,
            city TEXT,
            city_name TEXT,
            parsed_at TEXT,
            source TEXT DEFAULT 'kudago',
            image_count INTEGER DEFAULT 0,
            embedding_text TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
        ''')
        
        # –¢–∞–±–ª–∏—Ü–∞ –¥–ª—è –≤–µ–∫—Ç–æ—Ä–Ω—ã—Ö —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS event_embeddings (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            event_id INTEGER NOT NULL,
            embedding BLOB NOT NULL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (event_id) REFERENCES events (id)
        )
        ''')
        
        # –¢–∞–±–ª–∏—Ü–∞ –¥–ª—è –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS event_details (
            id INTEGER PRIMARY KEY,
            event_id INTEGER,
            dates_json TEXT,
            price_json TEXT,
            place_json TEXT,
            tags_json TEXT,
            images_json TEXT,
            participants_json TEXT,
            age_restriction TEXT,
            FOREIGN KEY (event_id) REFERENCES events (id)
        )
        ''')
        
        # –ò–Ω–¥–µ–∫—Å—ã –¥–ª—è –±—ã—Å—Ç—Ä–æ–≥–æ –ø–æ–∏—Å–∫–∞
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_city ON events(city)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_category ON events(category)')
        cursor.execute('CREATE INDEX IF NOT EXISTS idx_events_date ON events(parsed_at)')
        
        self.conn.commit()
    
    def _blob_to_array(self, blob):
        """–ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è BLOB –≤ numpy array"""
        return np.frombuffer(blob, dtype=np.float32)
    
    def _array_to_blob(self, array):
        """–ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è numpy array –≤ BLOB"""
        return array.astype(np.float32).tobytes()
    
    def create_embedding(self, text: str):
        """–°–æ–∑–¥–∞–Ω–∏–µ –≤–µ–∫—Ç–æ—Ä–Ω–æ–≥–æ —ç–º–±–µ–¥–¥–∏–Ω–≥–∞ –¥–ª—è —Ç–µ–∫—Å—Ç–∞"""
        try:
            if not text or not text.strip():
                # –í–æ–∑–≤—Ä–∞—â–∞–µ–º –Ω—É–ª–µ–≤–æ–π –≤–µ–∫—Ç–æ—Ä –µ—Å–ª–∏ —Ç–µ–∫—Å—Ç –ø—É—Å—Ç–æ–π
                return np.zeros(self.embedding_dim, dtype=np.float32)
            
            if self.model is None:
                # –ó–∞–≥–ª—É—à–∫–∞ –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è - —Å–ª—É—á–∞–π–Ω—ã–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
                logger.debug("–ò—Å–ø–æ–ª—å–∑—É—é –∑–∞–≥–ª—É—à–∫—É –¥–ª—è —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤")
                return np.random.randn(self.embedding_dim).astype(np.float32)
            
            # –°–æ–∑–¥–∞–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥ –±–µ–∑ –ø—Ä–æ–≥—Ä–µ—Å—Å-–±–∞—Ä–∞
            embedding = self.model.encode(text, show_progress_bar=False)
            
            # –ï—Å–ª–∏ embedding —É–∂–µ numpy array, –∫–æ–Ω–≤–µ—Ä—Ç–∏—Ä—É–µ–º –≤ –Ω—É–∂–Ω—ã–π —Ñ–æ—Ä–º–∞—Ç
            if isinstance(embedding, np.ndarray):
                return embedding.astype(np.float32)
            else:
                return np.array(embedding, dtype=np.float32)
                
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ —Å–æ–∑–¥–∞–Ω–∏—è —ç–º–±–µ–¥–¥–∏–Ω–≥–∞: {e}")
            return np.zeros(self.embedding_dim, dtype=np.float32)
    
    def save_event_with_embedding(self, event_data: Dict) -> bool:
        """–°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Å–æ–±—ã—Ç–∏—è —Å –≤–µ–∫—Ç–æ—Ä–Ω—ã–º —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–º"""
        try:
            event_id = event_data.get('id')
            if not event_id:
                logger.warning(f"–°–æ–±—ã—Ç–∏–µ –±–µ–∑ ID: {event_data.get('title', '–ë–µ–∑ –Ω–∞–∑–≤–∞–Ω–∏—è')}")
                return False
            
            cursor = self.conn.cursor()
            
            # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Å—É—â–µ—Å—Ç–≤–æ–≤–∞–Ω–∏–µ —Å–æ–±—ã—Ç–∏—è
            cursor.execute("SELECT id FROM events WHERE id = ?", (event_id,))
            exists = cursor.fetchone()
            
            # –ü–æ–¥–≥–æ—Ç–∞–≤–ª–∏–≤–∞–µ–º —Ç–µ–∫—Å—Ç –¥–ª—è —ç–º–±–µ–¥–¥–∏–Ω–≥–∞
            embedding_text = ""
            parts = []
            
            if event_data.get('title'):
                parts.append(str(event_data['title']))
            if event_data.get('description'):
                parts.append(str(event_data['description'])[:500])
            if event_data.get('tags'):
                tags = event_data['tags']
                if isinstance(tags, list):
                    tag_names = []
                    for tag in tags[:5]:
                        if isinstance(tag, dict):
                            tag_names.append(tag.get('name', ''))
                        elif isinstance(tag, str):
                            tag_names.append(tag)
                    if tag_names:
                        parts.append(' '.join(tag_names))
            if event_data.get('category'):
                parts.append(str(event_data['category']))
            
            embedding_text = " ".join(parts)
            
            # –û—Å–Ω–æ–≤–Ω—ã–µ –ø–æ–ª—è
            title = str(event_data.get('title', ''))[:200]
            description = str(event_data.get('description', ''))[:1000]
            category = str(event_data.get('category', ''))[:50]
            
            # –î–∞—Ç—ã
            dates_text_list = event_data.get('dates_text', [])
            if isinstance(dates_text_list, list):
                dates_text = ','.join([str(d) for d in dates_text_list])[:500]
            else:
                dates_text = str(dates_text_list)[:500]
            
            price_text = str(event_data.get('price_text', ''))[:200]
            place_text = str(event_data.get('place_text', ''))[:500]
            url = str(event_data.get('url', ''))[:500]
            city = str(event_data.get('city', ''))[:10]
            city_name = str(event_data.get('city_name', ''))[:50]
            parsed_at = str(event_data.get('parsed_at', datetime.now().isoformat()))
            image_count = event_data.get('image_count', 0)
            
            if exists:
                # –û–±–Ω–æ–≤–ª—è–µ–º —Å—É—â–µ—Å—Ç–≤—É—é—â–µ–µ —Å–æ–±—ã—Ç–∏–µ
                cursor.execute('''
                UPDATE events SET
                    title=?, description=?, category=?, dates_text=?,
                    price_text=?, place_text=?, url=?, city=?,
                    city_name=?, parsed_at=?, image_count=?, embedding_text=?
                WHERE id=?
                ''', (
                    title, description, category, dates_text,
                    price_text, place_text, url, city,
                    city_name, parsed_at, image_count, embedding_text,
                    event_id
                ))
            else:
                # –í—Å—Ç–∞–≤–ª—è–µ–º –Ω–æ–≤–æ–µ —Å–æ–±—ã—Ç–∏–µ
                cursor.execute('''
                INSERT INTO events (
                    id, title, description, category, dates_text,
                    price_text, place_text, url, city,
                    city_name, parsed_at, image_count, embedding_text
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    event_id, title, description, category, dates_text,
                    price_text, place_text, url, city,
                    city_name, parsed_at, image_count, embedding_text
                ))
            
            # –°–æ–∑–¥–∞–µ–º –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥
            embedding = self.create_embedding(embedding_text)
            embedding_blob = self._array_to_blob(embedding)
            
            # –°–æ—Ö—Ä–∞–Ω—è–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥
            cursor.execute('''
            INSERT OR REPLACE INTO event_embeddings (event_id, embedding)
            VALUES (?, ?)
            ''', (event_id, embedding_blob))
            
            # –°–æ—Ö—Ä–∞–Ω—è–µ–º –¥–µ—Ç–∞–ª–∏
            self._save_event_details(event_id, event_data)
            
            self.conn.commit()
            return True
            
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è —Å–æ–±—ã—Ç–∏—è {event_data.get('id')}: {e}")
            if self.conn:
                self.conn.rollback()
            return False
    
    def _save_event_details(self, event_id: int, event_data: Dict):
        """–°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö —Å–æ–±—ã—Ç–∏—è"""
        try:
            cursor = self.conn.cursor()
            
            def safe_to_json(data, default=None):
                try:
                    if data is None:
                        return '{}'
                    return json.dumps(data, ensure_ascii=False)
                except:
                    return json.dumps(default or {}, ensure_ascii=False)
            
            dates_json = safe_to_json(event_data.get('dates'))
            price_json = safe_to_json(event_data.get('price'))
            place_json = safe_to_json(event_data.get('place'))
            tags_json = safe_to_json(event_data.get('tags'), [])
            images_json = safe_to_json(event_data.get('images'), [])
            participants_json = safe_to_json(event_data.get('participants'), [])
            age_restriction = str(event_data.get('age_restriction', ''))[:50]
            
            cursor.execute("SELECT id FROM event_details WHERE event_id = ?", (event_id,))
            exists = cursor.fetchone()
            
            if exists:
                cursor.execute('''
                UPDATE event_details SET
                    dates_json=?, price_json=?, place_json=?, tags_json=?,
                    images_json=?, participants_json=?, age_restriction=?
                WHERE event_id=?
                ''', (
                    dates_json, price_json, place_json, tags_json,
                    images_json, participants_json, age_restriction,
                    event_id
                ))
            else:
                cursor.execute('''
                INSERT INTO event_details (
                    event_id, dates_json, price_json, place_json, tags_json,
                    images_json, participants_json, age_restriction
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                ''', (
                    event_id, dates_json, price_json, place_json, tags_json,
                    images_json, participants_json, age_restriction
                ))
                
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è –¥–µ—Ç–∞–ª–µ–π {event_id}: {e}")
    
    def save_events_batch(self, events: List[Dict]) -> Dict[str, int]:
        """–ü–∞–∫–µ—Ç–Ω–æ–µ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Å–æ–±—ã—Ç–∏–π"""
        success = 0
        failed = 0
        
        print("  –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Å–æ–±—ã—Ç–∏–π –≤ –±–∞–∑—É –¥–∞–Ω–Ω—ã—Ö...")
        for event in events:
            if self.save_event_with_embedding(event):
                success += 1
            else:
                failed += 1
        
        return {"success": success, "failed": failed, "total": len(events)}
    
    def search_similar(self, query: str, n_results: int = 10, city: str = None, category: str = None):
        """–ü–æ–∏—Å–∫ –ø–æ—Ö–æ–∂–∏—Ö —Å–æ–±—ã—Ç–∏–π –ø–æ –≤–µ–∫—Ç–æ—Ä–Ω–æ–º—É —Å—Ö–æ–¥—Å—Ç–≤—É"""
        try:
            # –°–æ–∑–¥–∞–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥ –¥–ª—è –∑–∞–ø—Ä–æ—Å–∞
            query_embedding = self.create_embedding(query)
            
            cursor = self.conn.cursor()
            
            # –ü–æ–ª—É—á–∞–µ–º –≤—Å–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
            if city and category:
                cursor.execute('''
                SELECT e.id, e.title, e.category, e.city, ed.embedding
                FROM events e
                JOIN event_embeddings ed ON e.id = ed.event_id
                WHERE e.city = ? AND e.category = ?
                ''', (city, category))
            elif city:
                cursor.execute('''
                SELECT e.id, e.title, e.category, e.city, ed.embedding
                FROM events e
                JOIN event_embeddings ed ON e.id = ed.event_id
                WHERE e.city = ?
                ''', (city,))
            elif category:
                cursor.execute('''
                SELECT e.id, e.title, e.category, e.city, ed.embedding
                FROM events e
                JOIN event_embeddings ed ON e.id = ed.event_id
                WHERE e.category = ?
                ''', (category,))
            else:
                cursor.execute('''
                SELECT e.id, e.title, e.category, e.city, ed.embedding
                FROM events e
                JOIN event_embeddings ed ON e.id = ed.event_id
                ''')
            
            rows = cursor.fetchall()
            
            # –í—ã—á–∏—Å–ª—è–µ–º –∫–æ—Å–∏–Ω—É—Å–Ω–æ–µ —Å—Ö–æ–¥—Å—Ç–≤–æ
            results = []
            for event_id, title, category, city, embedding_blob in rows:
                # –ö–æ–Ω–≤–µ—Ä—Ç–∏—Ä—É–µ–º BLOB –≤ numpy array
                event_embedding = self._blob_to_array(embedding_blob)
                
                # –í—ã—á–∏—Å–ª—è–µ–º –∫–æ—Å–∏–Ω—É—Å–Ω–æ–µ —Å—Ö–æ–¥—Å—Ç–≤–æ
                similarity = np.dot(query_embedding, event_embedding) / (
                    np.linalg.norm(query_embedding) * np.linalg.norm(event_embedding) + 1e-10
                )
                
                results.append({
                    'event_id': event_id,
                    'title': title,
                    'category': category,
                    'city': city,
                    'similarity': float(similarity)
                })
            
            # –°–æ—Ä—Ç–∏—Ä—É–µ–º –ø–æ —Å—Ö–æ–¥—Å—Ç–≤—É
            results.sort(key=lambda x: x['similarity'], reverse=True)
            
            # –ë–µ—Ä–µ–º —Ç–æ–ø-N —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤
            top_results = results[:n_results]
            
            # –ü–æ–ª—É—á–∞–µ–º –ø–æ–ª–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ —Å–æ–±—ã—Ç–∏—è—Ö
            for result in top_results:
                cursor.execute('''
                SELECT description, dates_text, price_text, place_text, url
                FROM events WHERE id = ?
                ''', (result['event_id'],))
                
                event_row = cursor.fetchone()
                if event_row:
                    result.update({
                        'description': event_row[0],
                        'dates_text': event_row[1],
                        'price_text': event_row[2],
                        'place_text': event_row[3],
                        'url': event_row[4]
                    })
            
            return top_results
            
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ –ø–æ–∏—Å–∫–∞: {e}")
            return []
    
    def get_stats(self) -> Dict:
        """–ü–æ–ª—É—á–µ–Ω–∏–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏"""
        try:
            cursor = self.conn.cursor()
            
            cursor.execute("SELECT COUNT(*) FROM events")
            total_events = cursor.fetchone()[0] or 0
            
            cursor.execute("SELECT COUNT(*) FROM event_embeddings")
            total_embeddings = cursor.fetchone()[0] or 0
            
            cursor.execute("SELECT COUNT(DISTINCT city) FROM events")
            cities_count = cursor.fetchone()[0] or 0
            
            cursor.execute("SELECT COUNT(DISTINCT category) FROM events")
            categories_count = cursor.fetchone()[0] or 0
            
            cursor.execute("SELECT city, COUNT(*) FROM events GROUP BY city")
            by_city = {row[0]: row[1] for row in cursor.fetchall()}
            
            return {
                "total_events": total_events,
                "total_embeddings": total_embeddings,
                "cities_count": cities_count,
                "categories_count": categories_count,
                "events_by_city": by_city,
                "db_path": self.db_path
            }
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ –ø–æ–ª—É—á–µ–Ω–∏—è —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏: {e}")
            return {}
    
    def close(self):
        """–ó–∞–∫—Ä—ã—Ç–∏–µ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è"""
        if self.conn:
            self.conn.close()

print(" –í–µ–∫—Ç–æ—Ä–Ω–∞—è –ë–î SQLite —Å–æ–∑–¥–∞–Ω–∞!")

# ============ 4. –£–õ–£–ß–®–ï–ù–ù–´–ô –ü–ê–†–°–ï–† KUDAGO ============
class EnhancedKudaGoParser:
    """–£–ª—É—á—à–µ–Ω–Ω—ã–π –ø–∞—Ä—Å–µ—Ä —Å –ø–æ–¥–¥–µ—Ä–∂–∫–æ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–π"""
    
    def __init__(self, city: str = "msk"):
        self.base_url = "https://kudago.com/public-api/v1.4"
        self.city = city
        self.city_name = "–ú–æ—Å–∫–≤–∞" if city == "msk" else "–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥"
        
        # –ü—Ä–æ–≤–µ—Ä–µ–Ω–Ω—ã–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –¥–ª—è –ú–æ—Å–∫–≤—ã (–æ—Å–Ω–æ–≤–Ω—ã–µ, –∫–æ—Ç–æ—Ä—ã–µ —Ç–æ—á–Ω–æ —Ä–∞–±–æ—Ç–∞—é—Ç)
        self.working_categories_msk = [
            ("concert", "–ö–æ–Ω—Ü–µ—Ä—Ç—ã"),
            ("theater", "–¢–µ–∞—Ç—Ä"), 
            ("exhibition", "–í—ã—Å—Ç–∞–≤–∫–∏"),
            ("festival", "–§–µ—Å—Ç–∏–≤–∞–ª–∏"),
            ("education", "–û–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ"),
            ("party", "–í–µ—á–µ—Ä–∏–Ω–∫–∏"),
            ("quest", "–ö–≤–µ—Å—Ç—ã"),
            ("kids", "–î–µ—Ç—Å–∫–∏–µ"),
            ("fashion", "–ú–æ–¥–∞"),
            ("cinema", "–ö–∏–Ω–æ"),
        ]
        
        # –ü—Ä–æ–≤–µ—Ä–µ–Ω–Ω—ã–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –¥–ª—è –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥–∞
        self.working_categories_spb = [
            ("concert", "–ö–æ–Ω—Ü–µ—Ä—Ç—ã"),
            ("theater", "–¢–µ–∞—Ç—Ä"), 
            ("exhibition", "–í—ã—Å—Ç–∞–≤–∫–∏"),
            ("festival", "–§–µ—Å—Ç–∏–≤–∞–ª–∏"),
            ("education", "–û–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ"),
            ("party", "–í–µ—á–µ—Ä–∏–Ω–∫–∏"),
            ("kids", "–î–µ—Ç—Å–∫–∏–µ"),
            ("cinema", "–ö–∏–Ω–æ"),
        ]
        
        # –í—Å–µ –≤–æ–∑–º–æ–∂–Ω—ã–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è
        self.all_categories = [
            ("concert", "–ö–æ–Ω—Ü–µ—Ä—Ç—ã"), ("theater", "–¢–µ–∞—Ç—Ä"), ("exhibition", "–í—ã—Å—Ç–∞–≤–∫–∏"),
            ("festival", "–§–µ—Å—Ç–∏–≤–∞–ª–∏"), ("education", "–û–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ"), ("party", "–í–µ—á–µ—Ä–∏–Ω–∫–∏"),
            ("sport", "–°–ø–æ—Ä—Ç"), ("quest", "–ö–≤–µ—Å—Ç—ã"), ("excursion", "–≠–∫—Å–∫—É—Ä—Å–∏–∏"),
            ("show", "–®–æ—É"), ("standup", "–°—Ç–µ–Ω–¥–∞–ø"), ("kids", "–î–µ—Ç—Å–∫–∏–µ"),
            ("fashion", "–ú–æ–¥–∞"), ("gastronomy", "–ì–∞—Å—Ç—Ä–æ–Ω–æ–º–∏—è"), ("cinema", "–ö–∏–Ω–æ"),
        ]
        
        # –ò—Å–ø–æ–ª—å–∑—É–µ–º —Ä–∞–±–æ—á–∏–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –≤ –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –≥–æ—Ä–æ–¥–∞
        if city == "msk":
            self.categories = self.working_categories_msk
        else:
            self.categories = self.working_categories_spb
            
        self.max_events_per_category = 30  # –£–º–µ–Ω—å—à–∞–µ–º –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è
        self.page_size = 20  # –£–º–µ–Ω—å—à–∞–µ–º —Ä–∞–∑–º–µ—Ä —Å—Ç—Ä–∞–Ω–∏—Ü—ã
    
    async def fetch_page(self, session, category: str, page: int) -> Optional[Dict]:
        """–ó–∞–≥—Ä—É–∑–∫–∞ –æ–¥–Ω–æ–π —Å—Ç—Ä–∞–Ω–∏—Ü—ã —Å–æ–±—ã—Ç–∏–π —Å —É–ª—É—á—à–µ–Ω–Ω—ã–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏"""
        try:
            # –ë–æ–ª–µ–µ –ø—Ä–æ—Å—Ç–æ–π –∑–∞–ø—Ä–æ—Å —Å –º–∏–Ω–∏–º–∞–ª—å–Ω—ã–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏
            params = {
                'categories': category,
                'location': self.city,
                'page_size': self.page_size,
                'page': page,
                'fields': 'id,title,description,dates,place,price,images,tags,age_restriction',
                'text_format': 'text'
            }
            
            async with session.get(f"{self.base_url}/events/", params=params, 
                                   timeout=aiohttp.ClientTimeout(total=30)) as response:
                if response.status == 200:
                    return await response.json()
                elif response.status == 400:
                    # –î–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ 400, –ø—Ä–æ–±—É–µ–º –±–µ–∑ location
                    logger.debug(f"–ü—Ä–æ–±—É–µ–º –∑–∞–ø—Ä–æ—Å –±–µ–∑ location –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}")
                    params.pop('location', None)
                    
                    async with session.get(f"{self.base_url}/events/", params=params, 
                                           timeout=aiohttp.ClientTimeout(total=30)) as response2:
                        if response2.status == 200:
                            return await response2.json()
                        else:
                            logger.debug(f"–ö–∞—Ç–µ–≥–æ—Ä–∏—è {category} –Ω–µ–¥–æ—Å—Ç—É–ø–Ω–∞ (—Å—Ç–∞—Ç—É—Å {response2.status})")
                            return None
                elif response.status == 429:
                    await asyncio.sleep(2)  # –£–≤–µ–ª–∏—á–∏–≤–∞–µ–º –ø–∞—É–∑—É –ø—Ä–∏ –æ–≥—Ä–∞–Ω–∏—á–µ–Ω–∏–∏
                    return None
                else:
                    logger.debug(f"HTTP Error {response.status} –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}")
                    return None
        except asyncio.TimeoutError:
            logger.warning(f"–¢–∞–π–º–∞—É—Ç –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}")
            return None
        except Exception as e:
            logger.warning(f"–û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}: {e}")
            return None
    
    async def enrich_place_info(self, session, place_data: Dict) -> Dict:
        """–û–±–æ–≥–∞—â–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –º–µ—Å—Ç–µ"""
        if not place_data or not isinstance(place_data, dict):
            return {}
        place_id = place_data.get('id')
        if not place_id:
            return place_data
        try:
            # –ü—Ä–æ—Å—Ç–æ–π –∑–∞–ø—Ä–æ—Å –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –º–µ—Å—Ç–µ
            async with session.get(f"{self.base_url}/places/{place_id}/", 
                                   timeout=aiohttp.ClientTimeout(total=10)) as response:
                if response.status == 200:
                    detailed = await response.json()
                    # –û–±–Ω–æ–≤–ª—è–µ–º —Ç–æ–ª—å–∫–æ –æ—Å–Ω–æ–≤–Ω—ã–µ –ø–æ–ª—è
                    updated_fields = {}
                    for field in ['title', 'address', 'subway', 'coords', 'phone', 'site_url', 'city']:
                        if field in detailed:
                            updated_fields[field] = detailed[field]
                    place_data.update(updated_fields)
        except Exception as e:
            # –ò–≥–Ω–æ—Ä–∏—Ä—É–µ–º –æ—à–∏–±–∫–∏ –æ–±–æ–≥–∞—â–µ–Ω–∏—è –º–µ—Å—Ç–∞
            pass
        return place_data
    
    def parse_raw_event(self, raw: Dict, category_name: str) -> Optional[Dict]:
        """–ü–∞—Ä—Å–∏–Ω–≥ —Å—ã—Ä–æ–≥–æ —Å–æ–±—ã—Ç–∏—è –≤ —Å–ª–æ–≤–∞—Ä—å —Å –æ–±—Ä–∞–±–æ—Ç–∫–æ–π –æ—à–∏–±–æ–∫"""
        try:
            event_id = raw.get('id')
            title = raw.get('title')
            
            if not event_id or not title:
                return None
            
            # –ë–∞–∑–æ–≤—ã–µ –¥–∞–Ω–Ω—ã–µ —Å–æ–±—ã—Ç–∏—è
            event_data = {
                'id': event_id,
                'title': str(title),
                'description': str(raw.get('description', ''))[:500],
                'category': category_name,
                'url': raw.get('site_url', f"https://kudago.com/{self.city}/event/{event_id}/"),
                'is_free': raw.get('is_free', False),
                'age_restriction': raw.get('age_restriction'),
                'favorites_count': raw.get('favorites_count', 0),
                'comments_count': raw.get('comments_count', 0),
            }
            
            # –î–∞—Ç—ã
            dates = []
            dates_text = []
            for date_info in raw.get('dates', []):
                if isinstance(date_info, dict):
                    start = date_info.get('start')
                    if start:
                        try:
                            start_dt = datetime.fromtimestamp(start)
                            dates.append({
                                'start': start, 
                                'end': date_info.get('end'),
                                'is_continuous': date_info.get('is_continuous', False)
                            })
                            text = start_dt.strftime("%d.%m.%Y %H:%M")
                            end = date_info.get('end')
                            if end:
                                try:
                                    end_dt = datetime.fromtimestamp(end)
                                    if start_dt.date() == end_dt.date():
                                        text += f" - {end_dt.strftime('%H:%M')}"
                                    else:
                                        text += f" - {end_dt.strftime('%d.%m.%Y %H:%M')}"
                                except:
                                    pass
                            dates_text.append(text)
                        except:
                            continue
            
            event_data['dates'] = dates
            event_data['dates_text'] = dates_text
            
            # –ú–µ—Å—Ç–æ
            raw_place = raw.get('place', {})
            event_data['place'] = raw_place
            if isinstance(raw_place, dict) and 'id' in raw_place:
                event_data['place_id'] = raw_place['id']
            
            # –¢–µ–∫—Å—Ç –º–µ—Å—Ç–∞
            place_parts = []
            if isinstance(raw_place, dict):
                if raw_place.get('title'):
                    place_parts.append(str(raw_place['title']))
                if raw_place.get('address'):
                    place_parts.append(str(raw_place['address']))
            event_data['place_text'] = ", ".join(place_parts)[:300]
            
            # –¶–µ–Ω–∞
            raw_price = raw.get('price', {})
            if isinstance(raw_price, dict):
                event_data['price'] = {
                    'is_free': raw_price.get('is_free', False), 
                    'min': raw_price.get('min'),
                    'max': raw_price.get('max'), 
                    'currency': raw_price.get('currency', 'RUB'),
                    'description': raw_price.get('description')
                }
                
                # –¢–µ–∫—Å—Ç —Ü–µ–Ω—ã
                price_text = ""
                if raw_price.get('is_free'):
                    price_text = "–ë–µ—Å–ø–ª–∞—Ç–Ω–æ"
                elif raw_price.get('min') and raw_price.get('max'):
                    price_text = f"{raw_price['min']} - {raw_price['max']} ‚ÇΩ"
                elif raw_price.get('min'):
                    price_text = f"–æ—Ç {raw_price['min']} ‚ÇΩ"
                elif raw_price.get('description'):
                    price_text = str(raw_price['description'])[:100]
                else:
                    price_text = "–¶–µ–Ω–∞ –Ω–µ —É–∫–∞–∑–∞–Ω–∞"
                    
                event_data['price_text'] = price_text
            else:
                event_data['price_text'] = "–¶–µ–Ω–∞ –Ω–µ —É–∫–∞–∑–∞–Ω–∞"
            
            # –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
            images = []
            for img in raw.get('images', [])[:3]:  # –û–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º 3 –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
                if isinstance(img, dict):
                    images.append({
                        'url': str(img.get('image', '')), 
                        'thumbnail': str(img.get('thumbnail', '')), 
                        'source': img.get('source', {})
                    })
            event_data['images'] = images
            event_data['image_count'] = len(images)
            
            # –¢–µ–≥–∏
            tags = []
            for tag in raw.get('tags', [])[:5]:  # –û–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º 5 —Ç–µ–≥–æ–≤
                if isinstance(tag, dict):
                    tags.append({
                        'name': str(tag.get('name', '')), 
                        'slug': tag.get('slug'), 
                        'id': tag.get('id')
                    })
                elif isinstance(tag, str):
                    tags.append({'name': str(tag)})
            event_data['tags'] = tags
            
            # –ì–æ—Ä–æ–¥
            event_data['city'] = self.city
            event_data['city_name'] = self.city_name
            event_data['parsed_at'] = datetime.now().isoformat()
            
            return event_data
        except Exception as e:
            logger.error(f"–û—à–∏–±–∫–∞ –ø–∞—Ä—Å–∏–Ω–≥–∞ —Å–æ–±—ã—Ç–∏—è {raw.get('id')}: {e}")
            return None
    
    async def parse_category(self, category_code: str, category_name: str) -> List[Dict]:
        """–ü–∞—Ä—Å–∏–Ω–≥ –≤—Å–µ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ —Å –ø–∞–≥–∏–Ω–∞—Ü–∏–µ–π"""
        all_events = []
        page = 1
        max_pages = 3  # –û–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º 3 —Å—Ç—Ä–∞–Ω–∏—Ü–∞–º–∏ –¥–ª—è —Ç–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏—è
        
        print(f"   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: {category_name}")
        
        async with aiohttp.ClientSession() as session:
            while len(all_events) < self.max_events_per_category and page <= max_pages:
                data = await self.fetch_page(session, category_code, page)
                
                if not data or 'results' not in data:
                    break
                
                events = data['results']
                if not events:
                    break
                
                # –ü–∞—Ä—Å–∏–º —Å–æ–±—ã—Ç–∏—è
                batch_events = []
                for raw_event in events:
                    event_dict = self.parse_raw_event(raw_event, category_name)
                    if event_dict:
                        batch_events.append(event_dict)
                
                # –ü—Ä–æ–ø—É—Å–∫–∞–µ–º –æ–±–æ–≥–∞—â–µ–Ω–∏–µ –º–µ—Å—Ç –¥–ª—è —Å–∫–æ—Ä–æ—Å—Ç–∏ (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
                enriched_events = batch_events
                # –ï—Å–ª–∏ –Ω—É–∂–Ω–æ –æ–±–æ–≥–∞—â–µ–Ω–∏–µ:
                # enriched_events = []
                # for event_dict in batch_events:
                #     if event_dict.get('place'):
                #         event_dict['place'] = await self.enrich_place_info(session, event_dict['place'])
                #     enriched_events.append(event_dict)
                
                all_events.extend(enriched_events)
                
                print(f"    –°—Ç—Ä–∞–Ω–∏—Ü–∞ {page}: {len(events)} —Å–æ–±—ã—Ç–∏–π, —Å–ø–∞—Ä—à–µ–Ω–æ {len(enriched_events)}")
                
                if not data.get('next'):
                    break
                
                page += 1
                await asyncio.sleep(1)  # –ü–∞—É–∑–∞ –º–µ–∂–¥—É –∑–∞–ø—Ä–æ—Å–∞–º–∏
        
        print(f"     –í—Å–µ–≥–æ: {len(all_events)} —Å–æ–±—ã—Ç–∏–π")
        return all_events
    
    async def parse_all_categories(self, max_concurrent: int = 2) -> Dict[str, List[Dict]]:
        """–ü–∞—Ä—Å–∏–Ω–≥ –≤—Å–µ—Ö –∫–∞—Ç–µ–≥–æ—Ä–∏–π –ø–∞—Ä–∞–ª–ª–µ–ª—å–Ω–æ"""
        print(f" –ù–∞—á–∏–Ω–∞–µ–º –ø–∞—Ä—Å–∏–Ω–≥ –≥–æ—Ä–æ–¥–∞: {self.city_name}")
        print(f" –ö–∞—Ç–µ–≥–æ—Ä–∏–π: {len(self.categories)}")
        print(f" –¶–µ–ª—å: {self.max_events_per_category} —Å–æ–±—ã—Ç–∏–π –Ω–∞ –∫–∞—Ç–µ–≥–æ—Ä–∏—é")
        print("-" * 50)
        
        # –°–æ–∑–¥–∞–µ–º –∑–∞–¥–∞—á–∏ –¥–ª—è –∫–∞–∂–¥–æ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–∏
        tasks = []
        for cat_code, cat_name in self.categories:
            task = self.parse_category(cat_code, cat_name)
            tasks.append((cat_name, task))
        
        # –û–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º –æ–¥–Ω–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–µ –∑–∞–ø—Ä–æ—Å—ã
        semaphore = asyncio.Semaphore(max_concurrent)
        
        async def run_with_limit(cat_name, task):
            async with semaphore:
                try:
                    return cat_name, await task
                except Exception as e:
                    logger.error(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–∞—Ä—Å–∏–Ω–≥–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {cat_name}: {e}")
                    return cat_name, []
        
        # –ó–∞–ø—É—Å–∫–∞–µ–º –≤—Å–µ –∑–∞–¥–∞—á–∏
        all_tasks = [run_with_limit(cat_name, task) for cat_name, task in tasks]
        results_list = await asyncio.gather(*all_tasks, return_exceptions=True)
        
        # –û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
        results = {}
        total_events = 0
        
        for result in results_list:
            if isinstance(result, Exception):
                logger.error(f"–ò—Å–∫–ª—é—á–µ–Ω–∏–µ –ø—Ä–∏ –ø–∞—Ä—Å–∏–Ω–≥–µ: {result}")
                continue
            
            cat_name, events = result
            results[cat_name] = events
            total_events += len(events)
        
        print("\n" + "="*50)
        print(f" –ü–ê–†–°–ò–ù–ì –ó–ê–í–ï–†–®–ï–ù!")
        print(f"  –ì–æ—Ä–æ–¥: {self.city_name}")
        print(f" –í—Å–µ–≥–æ —Å–æ–±—ã—Ç–∏–π: {total_events}")
        print("="*50)
        
        # –í—ã–≤–æ–¥–∏–º —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫—É –ø–æ –∫–∞—Ç–µ–≥–æ—Ä–∏—è–º
        for cat_name, events in results.items():
            if events:
                print(f"  {cat_name}: {len(events)} —Å–æ–±—ã—Ç–∏–π")
            else:
                print(f"  {cat_name}: 0 —Å–æ–±—ã—Ç–∏–π (–Ω–µ—Ç –¥–∞–Ω–Ω—ã—Ö)")
        
        return results

print(" –£–ª—É—á—à–µ–Ω–Ω—ã–π –ø–∞—Ä—Å–µ—Ä —Å–æ–∑–¥–∞–Ω!")

# ============ 5. –û–°–ù–û–í–ù–û–ô –ü–ê–ô–ü–õ–ê–ô–ù ============
class DataPipeline:
    """–ü–æ–ª–Ω—ã–π –ø–∞–π–ø–ª–∞–π–Ω –æ–±—Ä–∞–±–æ—Ç–∫–∏ –¥–∞–Ω–Ω—ã—Ö"""
    
    def __init__(self, city: str = "msk"):
        self.city = city
        self.parser = EnhancedKudaGoParser(city)
        self.vector_db = EventVectorDatabaseSQLite(db_path=f"events_{city}_vector.db")
        self.events_dict = None
        self.pydantic_events = None
    
    async def run_full_pipeline(self):
        """–ó–∞–ø—É—Å–∫ –ø–æ–ª–Ω–æ–≥–æ –ø–∞–π–ø–ª–∞–π–Ω–∞"""
        print("="*60)
        print(" –ó–ê–ü–£–°–ö –ü–û–õ–ù–û–ì–û –ü–ê–ô–ü–õ–ê–ô–ù–ê")
        print("="*60)
        
        # 1. –ü–∞—Ä—Å–∏–Ω–≥ –¥–∞–Ω–Ω—ã—Ö
        print("\n1. –ü–ê–†–°–ò–ù–ì –î–ê–ù–ù–´–•")
        print("-"*40)
        self.events_dict = await self.parser.parse_all_categories(max_concurrent=2)
        
        # –û–±—ä–µ–¥–∏–Ω—è–µ–º –≤—Å–µ —Å–æ–±—ã—Ç–∏—è
        all_events_raw = []
        for category, events in self.events_dict.items():
            all_events_raw.extend(events)
        
        print(f" –í—Å–µ–≥–æ —Å–ø–∞—Ä—à–µ–Ω–æ: {len(all_events_raw)} —Å–æ–±—ã—Ç–∏–π")
        
        if not all_events_raw:
            print(" –ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –æ–±—Ä–∞–±–æ—Ç–∫–∏")
            return None
        
        # 2. –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è –≤ Pydantic –º–æ–¥–µ–ª–∏
        print("\n2. –ö–û–ù–í–ï–†–¢–ê–¶–ò–Ø –í PYDANTIC")
        print("-"*40)
        self.pydantic_events = self.convert_to_pydantic(all_events_raw)
        print(f" –ö–æ–Ω–≤–µ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–æ: {len(self.pydantic_events)} –º–æ–¥–µ–ª–µ–π")
        
        # 3. –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –≤ JSON
        print("\n3. –°–û–•–†–ê–ù–ï–ù–ò–ï –í JSON")
        print("-"*40)
        json_path = self.save_to_json(all_events_raw)
        
        # 4. –ó–∞–≥—Ä—É–∑–∫–∞ –≤ –≤–µ–∫—Ç–æ—Ä–Ω—É—é –ë–î SQLite
        print("\n4. –ó–ê–ì–†–£–ó–ö–ê –í –í–ï–ö–¢–û–†–ù–£–Æ –ë–î SQLite")
        print("-"*40)
        save_result = self.vector_db.save_events_batch(all_events_raw)
        print(f" –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ: {save_result['success']} —Å–æ–±—ã—Ç–∏–π")
        print(f"  –û—à–∏–±–æ–∫: {save_result['failed']}")
        
        # 5. –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞
        print("\n5. –°–¢–ê–¢–ò–°–¢–ò–ö–ê")
        print("-"*40)
        self.show_statistics(all_events_raw)
        
        # 6. –ü—Ä–∏–º–µ—Ä –ø–æ–∏—Å–∫–∞
        print("\n6. –ü–†–ò–ú–ï–† –ü–û–ò–°–ö–ê")
        print("-"*40)
        if all_events_raw:
            # –ü—Ä–∏–º–µ—Ä –ø–æ–∏—Å–∫–∞
            search_query = "–∫–æ–Ω—Ü–µ—Ä—Ç –º—É–∑—ã–∫–∞"
            print(f" –ü–æ–∏—Å–∫: '{search_query}'")
            similar_events = self.vector_db.search_similar(search_query, n_results=3, city=self.city)
            
            if similar_events:
                print(f" –ù–∞–π–¥–µ–Ω–æ {len(similar_events)} –ø–æ—Ö–æ–∂–∏—Ö —Å–æ–±—ã—Ç–∏–π:")
                for i, event in enumerate(similar_events, 1):
                    print(f"{i}. {event['title'][:50]}...")
                    print(f"   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: {event['category']}")
                    print(f"   –°—Ö–æ–¥—Å—Ç–≤–æ: {event['similarity']:.3f}")
                    if event.get('price_text'):
                        print(f"   –¶–µ–Ω–∞: {event['price_text']}")
                    print()
            else:
                print(" –ü–æ—Ö–æ–∂–∏—Ö —Å–æ–±—ã—Ç–∏–π –Ω–µ –Ω–∞–π–¥–µ–Ω–æ")
        
        print("\n" + "="*60)
        print(" –ü–ê–ô–ü–õ–ê–ô–ù –£–°–ü–ï–®–ù–û –ó–ê–í–ï–†–®–ï–ù!")
        print("="*60)
        
        return {
            'total_events': len(all_events_raw),
            'pydantic_count': len(self.pydantic_events),
            'json_file': json_path,
            'db_stats': self.vector_db.get_stats(),
            'city': self.city
        }
    
    def convert_to_pydantic(self, events_raw: List[Dict]) -> List[EventModel]:
        """–ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è —Å–ª–æ–≤–∞—Ä–µ–π –≤ Pydantic –º–æ–¥–µ–ª–∏"""
        pydantic_events = []
        converted = 0
        errors = 0
        
        for event_dict in events_raw[:100]:  # –û–≥—Ä–∞–Ω–∏—á–∏–≤–∞–µ–º –¥–ª—è —Å–∫–æ—Ä–æ—Å—Ç–∏
            try:
                category_str = event_dict.get('category', '').lower()
                try:
                    # –ü—Ä–æ–±—É–µ–º –Ω–∞–π—Ç–∏ –ø–æ–¥—Ö–æ–¥—è—â—É—é –∫–∞—Ç–µ–≥–æ—Ä–∏—é
                    if category_str == "–∫–æ–Ω—Ü–µ—Ä—Ç—ã":
                        category = EventCategory.CONCERT
                    elif category_str == "—Ç–µ–∞—Ç—Ä":
                        category = EventCategory.THEATER
                    elif category_str == "–≤—ã—Å—Ç–∞–≤–∫–∏":
                        category = EventCategory.EXHIBITION
                    elif category_str == "—Ñ–µ—Å—Ç–∏–≤–∞–ª–∏":
                        category = EventCategory.FESTIVAL
                    elif category_str == "–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ":
                        category = EventCategory.EDUCATION
                    elif category_str == "–≤–µ—á–µ—Ä–∏–Ω–∫–∏":
                        category = EventCategory.PARTY
                    elif category_str == "—Å–ø–æ—Ä—Ç":
                        category = EventCategory.SPORT
                    elif category_str == "–∫–≤–µ—Å—Ç—ã":
                        category = EventCategory.QUEST
                    elif category_str == "—ç–∫—Å–∫—É—Ä—Å–∏–∏":
                        category = EventCategory.EXCURSION
                    elif category_str == "—à–æ—É":
                        category = EventCategory.SHOW
                    elif category_str == "—Å—Ç–µ–Ω–¥–∞–ø":
                        category = EventCategory.STANDUP
                    elif category_str == "–¥–µ—Ç—Å–∫–∏–µ":
                        category = EventCategory.KIDS
                    elif category_str == "–º–æ–¥–∞":
                        category = EventCategory.FASHION
                    elif category_str == "–≥–∞—Å—Ç—Ä–æ–Ω–æ–º–∏—è":
                        category = EventCategory.GASTRONOMY
                    elif category_str == "–∫–∏–Ω–æ":
                        category = EventCategory.CINEMA
                    else:
                        category = EventCategory.OTHER
                except:
                    category = EventCategory.OTHER
                
                # –¢–µ–≥–∏
                tags = []
                for tag_dict in event_dict.get('tags', []):
                    if isinstance(tag_dict, dict):
                        tags.append(TagModel(
                            name=tag_dict.get('name', ''),
                            slug=tag_dict.get('slug'),
                            id=tag_dict.get('id')
                        ))
                
                # –î–∞—Ç—ã
                dates = []
                for date_dict in event_dict.get('dates', []):
                    if isinstance(date_dict, dict) and date_dict.get('start'):
                        dates.append(DateModel(
                            start=date_dict.get('start'),
                            end=date_dict.get('end'),
                            is_continuous=date_dict.get('is_continuous', False)
                        ))
                
                # –ú–µ—Å—Ç–æ
                place = None
                place_dict = event_dict.get('place')
                if place_dict and isinstance(place_dict, dict):
                    place = PlaceModel(
                        id=place_dict.get('id'),
                        title=place_dict.get('title', '–ù–µ–∏–∑–≤–µ—Å—Ç–Ω–æ'),
                        address=place_dict.get('address'),
                        subway=place_dict.get('subway'),
                        coords=place_dict.get('coords'),
                        phone=place_dict.get('phone'),
                        site_url=place_dict.get('site_url'),
                        city=place_dict.get('city'),
                        is_closed=place_dict.get('is_closed', False),
                        working_hours=place_dict.get('working_hours')
                    )
                
                # –¶–µ–Ω–∞
                price = None
                price_dict = event_dict.get('price')
                if price_dict and isinstance(price_dict, dict):
                    price = PriceModel(
                        is_free=price_dict.get('is_free', False),
                        min=price_dict.get('min'),
                        max=price_dict.get('max'),
                        currency=price_dict.get('currency', 'RUB'),
                        description=price_dict.get('description')
                    )
                
                # –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
                images = []
                for img_dict in event_dict.get('images', []):
                    if isinstance(img_dict, dict):
                        images.append(ImageModel(
                            url=img_dict.get('url', ''),
                            thumbnail=img_dict.get('thumbnail'),
                            source=img_dict.get('source')
                        ))
                
                # –£—á–∞—Å—Ç–Ω–∏–∫–∏ (—É–ø—Ä–æ—â–µ–Ω–Ω–æ, —Ç–∞–∫ –∫–∞–∫ –≤ –¥–∞–Ω–Ω—ã—Ö –∏—Ö –º–æ–∂–µ—Ç –Ω–µ –±—ã—Ç—å)
                participants = []
                
                # –°–æ–∑–¥–∞–µ–º Pydantic –º–æ–¥–µ–ª—å
                event_model = EventModel(
                    id=event_dict['id'],
                    title=event_dict['title'],
                    description=event_dict.get('description'),
                    category=category,
                    tags=tags,
                    dates=dates,
                    age_restriction=event_dict.get('age_restriction'),
                    place=place,
                    place_id=event_dict.get('place_id'),
                    price=price,
                    images=images,
                    participants=participants,
                    url=event_dict.get('url', ''),
                    favorites_count=event_dict.get('favorites_count', 0),
                    comments_count=event_dict.get('comments_count', 0),
                    location=event_dict.get('location', self.city),
                    is_free=event_dict.get('is_free', False)
                )
                
                pydantic_events.append(event_model)
                converted += 1
                
            except Exception as e:
                errors += 1
                logger.debug(f"–û—à–∏–±–∫–∞ –∫–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏–∏ —Å–æ–±—ã—Ç–∏—è {event_dict.get('id')}: {e}")
        
        print(f" –£—Å–ø–µ—à–Ω–æ: {converted}, –æ—à–∏–±–æ–∫: {errors}")
        return pydantic_events
    
    def save_to_json(self, events: List[Dict]) -> str:
        """–°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –≤ JSON —Ñ–∞–π–ª"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"events_{self.city}_{timestamp}.json"
        filepath = Path("data") / filename
        Path("data").mkdir(exist_ok=True)
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(events, f, ensure_ascii=False, indent=2)
        
        size_mb = os.path.getsize(filepath) / (1024 * 1024)
        print(f" –§–∞–π–ª —Å–æ—Ö—Ä–∞–Ω–µ–Ω: {filepath}")
        print(f" –†–∞–∑–º–µ—Ä: {size_mb:.2f} MB")
        
        return str(filepath)
    
    def show_statistics(self, events: List[Dict]):
        """–û—Ç–æ–±—Ä–∞–∂–µ–Ω–∏–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏"""
        if not events:
            print(" –ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –¥–ª—è —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏")
            return
        
        print(f" –°–¢–ê–¢–ò–°–¢–ò–ö–ê –î–ê–ù–ù–´–•:")
        print(f"   –í—Å–µ–≥–æ —Å–æ–±—ã—Ç–∏–π: {len(events)}")
        
        # –ü–æ –∫–∞—Ç–µ–≥–æ—Ä–∏—è–º
        categories = {}
        for event in events:
            cat = event.get('category', '–ù–µ–∏–∑–≤–µ—Å—Ç–Ω–æ')
            categories[cat] = categories.get(cat, 0) + 1
        
        print(f"   –ö–∞—Ç–µ–≥–æ—Ä–∏–π: {len(categories)}")
        print(f"   –¢–æ–ø-5 –∫–∞—Ç–µ–≥–æ—Ä–∏–π:")
        for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True)[:5]:
            percentage = (count / len(events)) * 100
            print(f"     ‚Ä¢ {cat}: {count} ({percentage:.1f}%)")
        
        # –ú–µ—Å—Ç–∞
        events_with_place = sum(1 for e in events if e.get('place'))
        print(f"   –°–æ–±—ã—Ç–∏–π —Å —É–∫–∞–∑–∞–Ω–∏–µ–º –º–µ—Å—Ç–∞: {events_with_place} ({events_with_place/len(events)*100:.1f}%)")
        
        # –ë–µ—Å–ø–ª–∞—Ç–Ω—ã–µ
        free_events = sum(1 for e in events if e.get('is_free'))
        print(f"   –ë–µ—Å–ø–ª–∞—Ç–Ω—ã—Ö —Å–æ–±—ã—Ç–∏–π: {free_events} ({free_events/len(events)*100:.1f}%)")
        
        # –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
        events_with_images = sum(1 for e in events if e.get('images'))
        print(f"   –°–æ–±—ã—Ç–∏–π —Å –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è–º–∏: {events_with_images} ({events_with_images/len(events)*100:.1f}%)")
        
        # –î–∞—Ç—ã
        events_with_dates = sum(1 for e in events if e.get('dates'))
        print(f"   –°–æ–±—ã—Ç–∏–π —Å –¥–∞—Ç–∞–º–∏: {events_with_dates} ({events_with_dates/len(events)*100:.1f}%)")
    
    def search_events(self, query: str, n_results: int = 10, category: str = None):
        """–ü–æ–∏—Å–∫ —Å–æ–±—ã—Ç–∏–π –ø–æ –∑–∞–ø—Ä–æ—Å—É"""
        return self.vector_db.search_similar(query, n_results=n_results, city=self.city, category=category)
    
    def get_database_stats(self):
        """–ü–æ–ª—É—á–µ–Ω–∏–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –ë–î"""
        return self.vector_db.get_stats()

print(" –ü–∞–π–ø–ª–∞–π–Ω —Å–æ–∑–¥–∞–Ω!")

# ============ 6. –§–£–ù–ö–¶–ò–ò –î–õ–Ø –ë–´–°–¢–†–û–ì–û –ó–ê–ü–£–°–ö–ê ============
async def quick_parse_moscow():
    """–ë—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –ú–æ—Å–∫–≤—ã"""
    print(" –ë–´–°–¢–†–´–ô –ü–ê–†–°–ò–ù–ì –ú–û–°–ö–í–´")
    print("="*50)
    
    pipeline = DataPipeline("msk")
    results = await pipeline.run_full_pipeline()
    
    if results:
        print(f"\n –†–ï–ó–£–õ–¨–¢–ê–¢–´:")
        print(f"   –°–æ–±—ã—Ç–∏–π: {results['total_events']}")
        print(f"   Pydantic –º–æ–¥–µ–ª–µ–π: {results['pydantic_count']}")
        print(f"   JSON —Ñ–∞–π–ª: {results['json_file'].split('/')[-1]}")
        print(f"   –ë–î: {results['db_stats'].get('db_path', 'N/A')}")
    
    return results

async def quick_parse_spb():
    """–ë—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥–∞"""
    print(" –ë–´–°–¢–†–´–ô –ü–ê–†–°–ò–ù–ì –°–ê–ù–ö–¢-–ü–ï–¢–ï–†–ë–£–†–ì–ê")
    print("="*50)
    
    pipeline = DataPipeline("spb")
    results = await pipeline.run_full_pipeline()
    
    if results:
        print(f"\n –†–ï–ó–£–õ–¨–¢–ê–¢–´:")
        print(f"  –°–æ–±—ã—Ç–∏–π: {results['total_events']}")
        print(f"   Pydantic –º–æ–¥–µ–ª–µ–π: {results['pydantic_count']}")
        print(f"   JSON —Ñ–∞–π–ª: {results['json_file'].split('/')[-1]}")
        print(f"   –ë–î: {results['db_stats'].get('db_path', 'N/A')}")
    
    return results

async def parse_both_cities():
    """–ü–∞—Ä—Å–∏–Ω–≥ –æ–±–æ–∏—Ö –≥–æ—Ä–æ–¥–æ–≤"""
    print("  –ü–ê–†–°–ò–ù–ì –ú–û–°–ö–í–´ –ò –°–ê–ù–ö–¢-–ü–ï–¢–ï–†–ë–£–†–ì–ê")
    print("="*50)
    
    results = {}
    
    # –ú–æ—Å–∫–≤–∞
    print("\n1. –ú–û–°–ö–í–ê")
    print("-"*40)
    results["msk"] = await quick_parse_moscow()
    
    # –ü–∞—É–∑–∞
    print("\n –ñ–¥–µ–º 2 —Å–µ–∫—É–Ω–¥—ã...")
    await asyncio.sleep(2)
    
    # –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥
    print("\n2. –°–ê–ù–ö–¢-–ü–ï–¢–ï–†–ë–£–†–ì")
    print("-"*40)
    results["spb"] = await quick_parse_spb()
    
    # –ò—Ç–æ–≥–∏
    print("\n" + "="*50)
    print(" –ò–¢–û–ì–ò –ü–û –û–ë–û–ò–ú –ì–û–†–û–î–ê–ú")
    print("="*50)
    
    total_events = 0
    for city, result in results.items():
        if result:
            events = result.get('total_events', 0)
            total_events += events
            city_name = "–ú–æ—Å–∫–≤–∞" if city == "msk" else "–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥"
            print(f"{city_name}: {events} —Å–æ–±—ã—Ç–∏–π")
    
    print(f"\n –í–°–ï–ì–û: {total_events} —Å–æ–±—ã—Ç–∏–π")
    
    return results

# ============ 7. –ó–ê–ü–£–°–ö ============
print("\n" + "="*70)
print(" –ü–ê–†–°–ï–† –°–û–ë–´–¢–ò–ô KUDAGO - SQLite –í–ï–ö–¢–û–†–ù–ê–Ø –ë–ê–ó–ê")
print("="*70)
print("\n –í–û–ó–ú–û–ñ–ù–û–°–¢–ò:")
print("   15+ –∫–∞—Ç–µ–≥–æ—Ä–∏–π —Å–æ–±—ã—Ç–∏–π")
print("   –ü–∞—Ä—Å–∏–Ω–≥ –ú–æ—Å–∫–≤—ã –∏ –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥–∞")
print("   –û–±–æ–≥–∞—â–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –º–µ—Å—Ç–∞—Ö")
print("   Pydantic –º–æ–¥–µ–ª–∏ –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–∏")
print("   SQLite –≤–µ–∫—Ç–æ—Ä–Ω–∞—è –±–∞–∑–∞ —Å —ç–º–±–µ–¥–¥–∏–Ω–≥–∞–º–∏")
print("   –°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–π –ø–æ–∏—Å–∫ –ø–æ —Å–æ–±—ã—Ç–∏—è–º")
print("\n –ö–û–ú–ê–ù–î–´ –î–õ–Ø –ó–ê–ü–£–°–ö–ê:")
print("1. await quick_parse_moscow()    - –±—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –ú–æ—Å–∫–≤—ã")
print("2. await quick_parse_spb()       - –±—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –°–ü–±")
print("3. await parse_both_cities()     - –ø–∞—Ä—Å–∏–Ω–≥ –æ–±–æ–∏—Ö –≥–æ—Ä–æ–¥–æ–≤")
print("\n –ö–û–ú–ê–ù–î–´ –î–õ–Ø –ü–û–ò–°–ö–ê –ü–û–°–õ–ï –ü–ê–†–°–ò–ù–ì–ê:")
print('pipeline = DataPipeline("msk")')
print('results = pipeline.search_events("–∫–æ–Ω—Ü–µ—Ä—Ç —Ä–æ–∫", n_results=5)')
print('stats = pipeline.get_database_stats()')

 –ë–∏–±–ª–∏–æ—Ç–µ–∫–∏ –∑–∞–≥—Ä—É–∂–µ–Ω—ã –∏ –æ–∫—Ä—É–∂–µ–Ω–∏–µ –Ω–∞—Å—Ç—Ä–æ–µ–Ω–æ!
 Pydantic –º–æ–¥–µ–ª–∏ —Å–æ–∑–¥–∞–Ω—ã!
 –í–µ–∫—Ç–æ—Ä–Ω–∞—è –ë–î SQLite —Å–æ–∑–¥–∞–Ω–∞!
 –£–ª—É—á—à–µ–Ω–Ω—ã–π –ø–∞—Ä—Å–µ—Ä —Å–æ–∑–¥–∞–Ω!
 –ü–∞–π–ø–ª–∞–π–Ω —Å–æ–∑–¥–∞–Ω!

 –ü–ê–†–°–ï–† –°–û–ë–´–¢–ò–ô KUDAGO - SQLite –í–ï–ö–¢–û–†–ù–ê–Ø –ë–ê–ó–ê

 –í–û–ó–ú–û–ñ–ù–û–°–¢–ò:
   15+ –∫–∞—Ç–µ–≥–æ—Ä–∏–π —Å–æ–±—ã—Ç–∏–π
   –ü–∞—Ä—Å–∏–Ω–≥ –ú–æ—Å–∫–≤—ã –∏ –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥–∞
   –û–±–æ–≥–∞—â–µ–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –º–µ—Å—Ç–∞—Ö
   Pydantic –º–æ–¥–µ–ª–∏ –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–∏
   SQLite –≤–µ–∫—Ç–æ—Ä–Ω–∞—è –±–∞–∑–∞ —Å —ç–º–±–µ–¥–¥–∏–Ω–≥–∞–º–∏
   –°–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–π –ø–æ–∏—Å–∫ –ø–æ —Å–æ–±—ã—Ç–∏—è–º

 –ö–û–ú–ê–ù–î–´ –î–õ–Ø –ó–ê–ü–£–°–ö–ê:
1. await quick_parse_moscow()    - –±—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –ú–æ—Å–∫–≤—ã
2. await quick_parse_spb()       - –±—ã—Å—Ç—Ä—ã–π –ø–∞—Ä—Å–∏–Ω–≥ –°–ü–±
3. await parse_both_cities()     - –ø–∞—Ä—Å–∏–Ω–≥ –æ–±–æ–∏—Ö –≥–æ—Ä–æ–¥–æ–≤

 –ö–û–ú–ê–ù–î–´ –î–õ–Ø –ü–û–ò–°–ö–

In [2]:
await parse_both_cities()

  –ü–ê–†–°–ò–ù–ì –ú–û–°–ö–í–´ –ò –°–ê–ù–ö–¢-–ü–ï–¢–ï–†–ë–£–†–ì–ê

1. –ú–û–°–ö–í–ê
----------------------------------------
 –ë–´–°–¢–†–´–ô –ü–ê–†–°–ò–ù–ì –ú–û–°–ö–í–´
 –í–µ–∫—Ç–æ—Ä–Ω–∞—è –ë–î SQLite —Å–æ–∑–¥–∞–Ω–∞: events_msk_vector.db
 –†–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤: 384
 –ó–ê–ü–£–°–ö –ü–û–õ–ù–û–ì–û –ü–ê–ô–ü–õ–ê–ô–ù–ê

1. –ü–ê–†–°–ò–ù–ì –î–ê–ù–ù–´–•
----------------------------------------
 –ù–∞—á–∏–Ω–∞–µ–º –ø–∞—Ä—Å–∏–Ω–≥ –≥–æ—Ä–æ–¥–∞: –ú–æ—Å–∫–≤–∞
 –ö–∞—Ç–µ–≥–æ—Ä–∏–π: 10
 –¶–µ–ª—å: 30 —Å–æ–±—ã—Ç–∏–π –Ω–∞ –∫–∞—Ç–µ–≥–æ—Ä–∏—é
--------------------------------------------------
   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: –ö–æ–Ω—Ü–µ—Ä—Ç—ã
   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: –¢–µ–∞—Ç—Ä
    –°—Ç—Ä–∞–Ω–∏—Ü–∞ 1: 20 —Å–æ–±—ã—Ç–∏–π, —Å–ø–∞—Ä—à–µ–Ω–æ 20
    –°—Ç—Ä–∞–Ω–∏—Ü–∞ 1: 20 —Å–æ–±—ã—Ç–∏–π, —Å–ø–∞—Ä—à–µ–Ω–æ 20
    –°—Ç—Ä–∞–Ω–∏—Ü–∞ 2: 20 —Å–æ–±—ã—Ç–∏–π, —Å–ø–∞—Ä—à–µ–Ω–æ 20
    –°—Ç—Ä–∞–Ω–∏—Ü–∞ 2: 20 —Å–æ–±—ã—Ç–∏–π, —Å–ø–∞—Ä—à–µ–Ω–æ 20
     –í—Å–µ–≥–æ: 40 —Å–æ–±—ã—Ç–∏–π
   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: –í—ã—Å—Ç–∞–≤–∫–∏
  

{'msk': {'total_events': 400,
  'pydantic_count': 82,
  'json_file': 'data\\events_msk_20251212_005956.json',
  'db_stats': {'total_events': 838,
   'total_embeddings': 1975,
   'cities_count': 1,
   'categories_count': 10,
   'events_by_city': {'msk': 838},
   'db_path': 'events_msk_vector.db'},
  'city': 'msk'},
 'spb': {'total_events': 320,
  'pydantic_count': 78,
  'json_file': 'data\\events_spb_20251212_010029.json',
  'db_stats': {'total_events': 784,
   'total_embeddings': 1258,
   'cities_count': 1,
   'categories_count': 10,
   'events_by_city': {'spb': 784},
   'db_path': 'events_spb_vector.db'},
  'city': 'spb'}}

In [None]:
%%writefile vector_db_manager.py

import sqlite3
import numpy as np
import json
import os
import shutil
from datetime import datetime, timedelta  # –ò–°–ü–†–ê–í–õ–ï–ù–û: –¥–æ–±–∞–≤–ª–µ–Ω timedelta
from typing import List, Dict, Optional, Any
import logging

# –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∏—è
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class VectorDBManager:
    """–ú–µ–Ω–µ–¥–∂–µ—Ä –¥–ª—è —Ä–∞–±–æ—Ç—ã —Å –≤–µ–∫—Ç–æ—Ä–Ω–æ–π –ë–î —Å–æ–±—ã—Ç–∏–π"""
    
    def __init__(self, db_path: str):
        """
        –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –º–µ–Ω–µ–¥–∂–µ—Ä–∞ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö
        
        Args:
            db_path: –ü—É—Ç—å –∫ —Ñ–∞–π–ª—É –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö SQLite
        """
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.conn.row_factory = sqlite3.Row  # –î–ª—è –¥–æ—Å—Ç—É–ø–∞ –∫ –∫–æ–ª–æ–Ω–∫–∞–º –ø–æ –∏–º–µ–Ω–∏
        logger.info(f"–ü–æ–¥–∫–ª—é—á–µ–Ω–æ –∫ –ë–î: {db_path}")
    
    def get_database_stats(self) -> Dict[str, Any]:
        """–ü–æ–ª—É—á–µ–Ω–∏–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö"""
        cursor = self.conn.cursor()
        
        cursor.execute("SELECT COUNT(*) FROM events")
        total_events = cursor.fetchone()[0] or 0
        
        cursor.execute("SELECT COUNT(*) FROM event_embeddings")
        total_embeddings = cursor.fetchone()[0] or 0
        
        cursor.execute("SELECT COUNT(DISTINCT category) FROM events")
        categories_count = cursor.fetchone()[0] or 0
        
        cursor.execute("SELECT COUNT(DISTINCT city) FROM events")
        cities_count = cursor.fetchone()[0] or 0
        
        cursor.execute("SELECT category, COUNT(*) as count FROM events GROUP BY category ORDER BY count DESC")
        category_stats = cursor.fetchall()
        
        return {
            "total_events": total_events,
            "total_embeddings": total_embeddings,
            "categories_count": categories_count,
            "cities_count": cities_count,
            "category_stats": [dict(row) for row in category_stats],
            "db_path": self.db_path,
            "db_size_mb": os.path.getsize(self.db_path) / (1024 * 1024) if os.path.exists(self.db_path) else 0
        }
    
    def search_by_keyword(self, keyword: str, limit: int = 10) -> List[Dict]:
        """–ü–æ–∏—Å–∫ –ø–æ –∫–ª—é—á–µ–≤—ã–º —Å–ª–æ–≤–∞–º (–Ω–µ –≤–µ–∫—Ç–æ—Ä–Ω—ã–π)"""
        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT id, title, category, dates_text, price_text, place_text, url
            FROM events 
            WHERE title LIKE ? OR description LIKE ? OR embedding_text LIKE ?
            ORDER BY id DESC
            LIMIT ?
        ''', (f'%{keyword}%', f'%{keyword}%', f'%{keyword}%', limit))
        
        return [dict(row) for row in cursor.fetchall()]
    
    def search_by_category(self, category: str, limit: int = 20) -> List[Dict]:
        """–ü–æ–∏—Å–∫ —Å–æ–±—ã—Ç–∏–π –ø–æ –∫–∞—Ç–µ–≥–æ—Ä–∏–∏"""
        cursor = self.conn.cursor()
        cursor.execute('''
            SELECT id, title, category, dates_text, price_text, place_text, url, description
            FROM events 
            WHERE category = ?
            ORDER BY id DESC
            LIMIT ?
        ''', (category, limit))
        
        return [dict(row) for row in cursor.fetchall()]
    
    def get_recent_events(self, days: int = 7, limit: int = 20) -> List[Dict]:
        """–ü–æ–ª—É—á–µ–Ω–∏–µ –Ω–µ–¥–∞–≤–Ω–∏—Ö —Å–æ–±—ã—Ç–∏–π"""
        cursor = self.conn.cursor()
        
        # –î–ª—è —É–ø—Ä–æ—â–µ–Ω–∏—è, –±–µ—Ä–µ–º –ø—Ä–æ—Å—Ç–æ –ø–æ—Å–ª–µ–¥–Ω–∏–µ –¥–æ–±–∞–≤–ª–µ–Ω–Ω—ã–µ
        cursor.execute('''
            SELECT id, title, category, dates_text, price_text, place_text, url, parsed_at
            FROM events 
            ORDER BY parsed_at DESC
            LIMIT ?
        ''', (limit,))
        
        return [dict(row) for row in cursor.fetchall()]
    
    def export_to_csv(self, output_file: str = "events_export.csv") -> bool:
        """–≠–∫—Å–ø–æ—Ä—Ç —Å–æ–±—ã—Ç–∏–π –≤ CSV"""
        try:
            import pandas as pd
            df = pd.read_sql_query("SELECT * FROM events", self.conn)
            df.to_csv(output_file, index=False, encoding='utf-8')
            logger.info(f"‚úÖ –≠–∫—Å–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–æ {len(df)} —Å–æ–±—ã—Ç–∏–π –≤ {output_file}")
            return True
        except ImportError:
            logger.error("‚ùå –î–ª—è —ç–∫—Å–ø–æ—Ä—Ç–∞ –≤ CSV —É—Å—Ç–∞–Ω–æ–≤–∏—Ç–µ pandas: pip install pandas")
            return False
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ —ç–∫—Å–ø–æ—Ä—Ç–∞ –≤ CSV: {e}")
            return False
    
    def export_simple_csv(self, output_file: str = "events_simple.csv"):
        """–≠–∫—Å–ø–æ—Ä—Ç –≤ CSV –±–µ–∑ pandas (–ø—Ä–æ—Å—Ç–æ–π –≤–∞—Ä–∏–∞–Ω—Ç)"""
        try:
            cursor = self.conn.cursor()
            cursor.execute('''
                SELECT id, title, category, dates_text, price_text, place_text, url, city, parsed_at
                FROM events
            ''')
            
            with open(output_file, 'w', encoding='utf-8') as f:
                # –ó–∞–≥–æ–ª–æ–≤–æ–∫
                f.write("ID;–ù–∞–∑–≤–∞–Ω–∏–µ;–ö–∞—Ç–µ–≥–æ—Ä–∏—è;–î–∞—Ç—ã;–¶–µ–Ω–∞;–ú–µ—Å—Ç–æ;URL;–ì–æ—Ä–æ–¥;–î–∞—Ç–∞ –ø–∞—Ä—Å–∏–Ω–≥–∞\n")
                
                # –î–∞–Ω–Ω—ã–µ
                for row in cursor.fetchall():
                    # –≠–∫—Ä–∞–Ω–∏—Ä—É–µ–º –∫–∞–≤—ã—á–∫–∏ –∏ –∑–∞–º–µ–Ω—è–µ–º —Ä–∞–∑–¥–µ–ª–∏—Ç–µ–ª–∏
                    title = str(row[1]).replace(';', ',').replace('"', "'")
                    category = str(row[2]).replace(';', ',')
                    dates = str(row[3]).replace(';', ',').replace('"', "'") if row[3] else ""
                    price = str(row[4]).replace(';', ',') if row[4] else ""
                    place = str(row[5]).replace(';', ',').replace('"', "'") if row[5] else ""
                    url = str(row[6]) if row[6] else ""
                    city = str(row[7]) if row[7] else ""
                    parsed_at = str(row[8]) if row[8] else ""
                    
                    f.write(f'{row[0]};"{title}";{category};"{dates}";{price};"{place}";{url};{city};{parsed_at}\n')
            
            logger.info(f"‚úÖ –≠–∫—Å–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–æ –≤ {output_file}")
            return True
            
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ —ç–∫—Å–ø–æ—Ä—Ç–∞: {e}")
            return False
    
    def export_to_json(self, output_file: str = "events_export.json", limit: int = 100):
        """–≠–∫—Å–ø–æ—Ä—Ç —Å–æ–±—ã—Ç–∏–π –≤ JSON"""
        try:
            cursor = self.conn.cursor()
            cursor.execute(f'''
                SELECT e.*, ed.dates_json, ed.price_json, ed.place_json, 
                       ed.tags_json, ed.images_json, ed.participants_json
                FROM events e
                LEFT JOIN event_details ed ON e.id = ed.event_id
                ORDER BY e.id DESC
                LIMIT ?
            ''', (limit,))
            
            columns = [desc[0] for desc in cursor.description]
            events = []
            
            for row in cursor.fetchall():
                event_dict = {}
                for i, col in enumerate(columns):
                    value = row[i]
                    # –ü—ã—Ç–∞–µ–º—Å—è –ø–∞—Ä—Å–∏—Ç—å JSON –ø–æ–ª—è
                    if col.endswith('_json') and value:
                        try:
                            event_dict[col] = json.loads(value)
                        except:
                            event_dict[col] = value
                    else:
                        event_dict[col] = value
                events.append(event_dict)
            
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(events, f, ensure_ascii=False, indent=2)
            
            logger.info(f"‚úÖ –≠–∫—Å–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–æ {len(events)} —Å–æ–±—ã—Ç–∏–π –≤ {output_file}")
            return True
            
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ —ç–∫—Å–ø–æ—Ä—Ç–∞ –≤ JSON: {e}")
            return False
    
    def cleanup_old_events(self, days_old: int = 30) -> int:
        """–û—á–∏—Å—Ç–∫–∞ —Å—Ç–∞—Ä—ã—Ö —Å–æ–±—ã—Ç–∏–π"""
        try:
            cursor = self.conn.cursor()
            cutoff_date = (datetime.now() - timedelta(days=days_old)).isoformat()
            
            # –°–Ω–∞—á–∞–ª–∞ —É–¥–∞–ª—è–µ–º —Å–≤—è–∑–∞–Ω–Ω—ã–µ –∑–∞–ø–∏—Å–∏
            cursor.execute("DELETE FROM event_embeddings WHERE event_id IN (SELECT id FROM events WHERE parsed_at < ?)", (cutoff_date,))
            cursor.execute("DELETE FROM event_details WHERE event_id IN (SELECT id FROM events WHERE parsed_at < ?)", (cutoff_date,))
            
            # –ó–∞—Ç–µ–º —É–¥–∞–ª—è–µ–º —Å–∞–º–∏ —Å–æ–±—ã—Ç–∏—è
            cursor.execute("DELETE FROM events WHERE parsed_at < ?", (cutoff_date,))
            deleted = cursor.rowcount
            
            self.conn.commit()
            logger.info(f"üßπ –£–¥–∞–ª–µ–Ω–æ {deleted} —Å—Ç–∞—Ä—ã—Ö —Å–æ–±—ã—Ç–∏–π (—Å—Ç–∞—Ä—à–µ {days_old} –¥–Ω–µ–π)")
            return deleted
            
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ –æ—á–∏—Å—Ç–∫–∏ —Å—Ç–∞—Ä—ã—Ö —Å–æ–±—ã—Ç–∏–π: {e}")
            self.conn.rollback()
            return 0
    
    def backup_database(self, backup_dir: str = "backups") -> Optional[str]:
        """–°–æ–∑–¥–∞–Ω–∏–µ —Ä–µ–∑–µ—Ä–≤–Ω–æ–π –∫–æ–ø–∏–∏ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö"""
        try:
            os.makedirs(backup_dir, exist_ok=True)
            
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            db_name = os.path.basename(self.db_path)
            backup_file = os.path.join(backup_dir, f"{db_name}_{timestamp}")
            
            # –ö–æ–ø–∏—Ä—É–µ–º —Ñ–∞–π–ª –ë–î
            shutil.copy2(self.db_path, backup_file)
            
            logger.info(f"üíæ –°–æ–∑–¥–∞–Ω–∞ —Ä–µ–∑–µ—Ä–≤–Ω–∞—è –∫–æ–ø–∏—è: {backup_file}")
            return backup_file
            
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ —Å–æ–∑–¥–∞–Ω–∏—è —Ä–µ–∑–µ—Ä–≤–Ω–æ–π –∫–æ–ø–∏–∏: {e}")
            return None
    
    def get_event_by_id(self, event_id: int) -> Optional[Dict]:
        """–ü–æ–ª—É—á–µ–Ω–∏–µ –ø–æ–ª–Ω–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ —Å–æ–±—ã—Ç–∏–∏ –ø–æ ID"""
        try:
            cursor = self.conn.cursor()
            cursor.execute('''
                SELECT e.*, ed.dates_json, ed.price_json, ed.place_json, 
                       ed.tags_json, ed.images_json, ed.participants_json
                FROM events e
                LEFT JOIN event_details ed ON e.id = ed.event_id
                WHERE e.id = ?
            ''', (event_id,))
            
            row = cursor.fetchone()
            if not row:
                return None
            
            # –ü—Ä–µ–æ–±—Ä–∞–∑—É–µ–º –≤ —Å–ª–æ–≤–∞—Ä—å
            columns = [desc[0] for desc in cursor.description]
            event_dict = {}
            
            for i, col in enumerate(columns):
                value = row[i]
                if col.endswith('_json') and value:
                    try:
                        event_dict[col] = json.loads(value)
                    except:
                        event_dict[col] = value
                else:
                    event_dict[col] = value
            
            return event_dict
            
        except Exception as e:
            logger.error(f"‚ùå –û—à–∏–±–∫–∞ –ø–æ–ª—É—á–µ–Ω–∏—è —Å–æ–±—ã—Ç–∏—è {event_id}: {e}")
            return None
    
    def print_statistics(self):
        """–í—ã–≤–æ–¥ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –≤ –∫–æ–Ω—Å–æ–ª—å"""
        stats = self.get_database_stats()
        
        print("="*60)
        print("üìä –°–¢–ê–¢–ò–°–¢–ò–ö–ê –ë–ê–ó–´ –î–ê–ù–ù–´–• –°–û–ë–´–¢–ò–ô")
        print("="*60)
        print(f"üìÅ –§–∞–π–ª –ë–î: {stats['db_path']}")
        print(f"üì¶ –†–∞–∑–º–µ—Ä: {stats['db_size_mb']:.2f} MB")
        print(f"üé´ –í—Å–µ–≥–æ —Å–æ–±—ã—Ç–∏–π: {stats['total_events']}")
        print(f"üî¢ –≠–º–±–µ–¥–¥–∏–Ω–≥–æ–≤: {stats['total_embeddings']}")
        print(f"üèôÔ∏è –ì–æ—Ä–æ–¥–æ–≤: {stats['cities_count']}")
        print(f"üè∑Ô∏è –ö–∞—Ç–µ–≥–æ—Ä–∏–π: {stats['categories_count']}")
        
        if stats['category_stats']:
            print("\nüèÜ –¢–æ–ø –∫–∞—Ç–µ–≥–æ—Ä–∏–π:")
            for cat_stat in stats['category_stats'][:10]:
                print(f"  {cat_stat['category']}: {cat_stat['count']} —Å–æ–±—ã—Ç–∏–π")
    
    def close(self):
        """–ó–∞–∫—Ä—ã—Ç–∏–µ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è —Å –ë–î"""
        if self.conn:
            self.conn.close()
            logger.info("–°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –ë–î –∑–∞–∫—Ä—ã—Ç–æ")
    
    def __enter__(self):
        """–ü–æ–¥–¥–µ—Ä–∂–∫–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–Ω–æ–≥–æ –º–µ–Ω–µ–¥–∂–µ—Ä–∞ (with statement)"""
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """–ê–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–æ–µ –∑–∞–∫—Ä—ã—Ç–∏–µ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è –ø—Ä–∏ –≤—ã—Ö–æ–¥–µ –∏–∑ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞"""
        self.close()


# ============ –§–£–ù–ö–¶–ò–ò –î–õ–Ø –ë–´–°–¢–†–û–ì–û –ò–°–ü–û–õ–¨–ó–û–í–ê–ù–ò–Ø ============

def quick_analyze(db_path: str = "events_msk_vector.db"):
    """–ë—ã—Å—Ç—Ä—ã–π –∞–Ω–∞–ª–∏–∑ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö"""
    with VectorDBManager(db_path) as manager:
        manager.print_statistics()
        
        print("\nüîç –ü—Ä–∏–º–µ—Ä—ã —Å–æ–±—ã—Ç–∏–π:")
        recent = manager.get_recent_events(limit=3)
        for event in recent:
            print(f"  ‚Ä¢ {event['title'][:50]}... ({event['category']})")

def export_all_formats(db_path: str = "events_msk_vector.db"):
    """–≠–∫—Å–ø–æ—Ä—Ç –≤–æ –≤—Å–µ —Ñ–æ—Ä–º–∞—Ç—ã"""
    with VectorDBManager(db_path) as manager:
        # –≠–∫—Å–ø–æ—Ä—Ç –≤ CSV
        manager.export_simple_csv("events_export.csv")
        
        # –≠–∫—Å–ø–æ—Ä—Ç –≤ JSON
        manager.export_to_json("events_export.json", limit=50)
        
        # –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞
        manager.print_statistics()

def search_and_export(keyword: str, db_path: str = "events_msk_vector.db"):
    """–ü–æ–∏—Å–∫ –∏ —ç–∫—Å–ø–æ—Ä—Ç —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤"""
    with VectorDBManager(db_path) as manager:
        print(f"üîç –ü–æ–∏—Å–∫: '{keyword}'")
        results = manager.search_by_keyword(keyword, limit=20)
        
        print(f"üìÑ –ù–∞–π–¥–µ–Ω–æ: {len(results)} —Å–æ–±—ã—Ç–∏–π")
        
        # –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
        if results:
            with open(f"search_{keyword}.json", 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
            print(f"üíæ –†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ search_{keyword}.json")
            
            # –í—ã–≤–æ–¥–∏–º –ø–µ—Ä–≤—ã–µ 5
            for i, event in enumerate(results[:5], 1):
                print(f"{i}. {event['title']}")
                print(f"   –ö–∞—Ç–µ–≥–æ—Ä–∏—è: {event['category']}")
                if event.get('place_text'):
                    print(f"   –ú–µ—Å—Ç–æ: {event['place_text'][:50]}...")
                print()


if __name__ == "__main__":
    # –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è –ø—Ä–∏ –∑–∞–ø—É—Å–∫–µ —Ñ–∞–π–ª–∞ –Ω–∞–ø—Ä—è–º—É—é
    print("üöÄ –ó–∞–ø—É—Å–∫ –º–µ–Ω–µ–¥–∂–µ—Ä–∞ –≤–µ–∫—Ç–æ—Ä–Ω–æ–π –ë–î...")
    
    # –ê–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–∏–π –∞–Ω–∞–ª–∏–∑, –µ—Å–ª–∏ —Ñ–∞–π–ª –ë–î —Å—É—â–µ—Å—Ç–≤—É–µ—Ç
    if os.path.exists("events_msk_vector.db"):
        quick_analyze("events_msk_vector.db")
    else:
        print("‚ùå –§–∞–π–ª –ë–î –Ω–µ –Ω–∞–π–¥–µ–Ω. –°–Ω–∞—á–∞–ª–∞ –∑–∞–ø—É—Å—Ç–∏—Ç–µ –ø–∞—Ä—Å–µ—Ä.")