In [None]:
import os
import csv
import json
import sys
from typing import Optional
from pydantic import BaseModel, ValidationError

In [None]:
# Модель данных для покупок
class PurchaseData(BaseModel):
    user_id: str
    category: str

In [None]:
# Модель данных для посещений
class VisitData(BaseModel):
    user_id: str
    source: Optional[str]

In [None]:
def process_files(purchases_path: str, visits_path: str, output_path: str) -> None:
    """Обработка файлов и создание funnel.csv."""

    purchases = {}

    # Чтение файла с покупками
    with open(purchases_path, "r", encoding="utf-8") as f:
        next(f)  # пропускаем заголовок
        for line in f:
            try:
                data = json.loads(line.strip())
                purchase = PurchaseData(**data)
                purchases[purchase.user_id] = purchase.category
            except (ValidationError, json.JSONDecodeError):
                continue

    # Обработка файлов с посещениями и запись в выходной файл
    with open(visits_path, "r", encoding="utf-8") as f_visits, \
            open(output_path, "w", newline='', encoding='utf-8') as f_output:
        reader = csv.reader(f_visits)
        writer = csv.writer(f_output)

        next(reader)  # пропускаем заголовок файла посещений
        writer.writerow(["user_id", "source", "category"])

        for row in reader:
            try:
                data = {'user_id': row[0], 'source': row[1]}
                visit = VisitData(**data)
                if visit.user_id in purchases:
                    writer.writerow([visit.user_id, visit.source, purchases[visit.user_id]])
            except (ValidationError, IndexError):
                continue

In [None]:
def run_homework():
    """Главная функция."""

    # Определение путей к файлам
    base_dir = os.getcwd() if 'ipykernel' in sys.modules else os.path.dirname(os.path.abspath(__file__))

    purchases_path = os.path.join(base_dir, 'purchase_log.txt')
    visits_path = os.path.join(base_dir, 'visit_log.csv')
    output_path = os.path.join(base_dir, 'funnel.csv')

    process_files(purchases_path, visits_path, output_path)

    # Выводим первые 3 строки из выходного файла
    with open(output_path, "r", encoding="utf-8") as f:
        for _ in range(3):
            print(f.readline().strip())

In [None]:
if __name__ == "__main__":
    run_homework()