# üèõÔ∏è Sejm Process Downloader - Pobieranie procesu nr 471

Ten notebook pobiera dane z Sejmu dla konkretnego procesu legislacyjnego wraz z za≈ÇƒÖcznikami i tworzy drzewo chronologiczne oraz powiƒÖzaniowe.

## U≈ºycie:
1. Uruchom wszystkie kom√≥rki po kolei
2. Wyniki zostanƒÖ zapisane w folderze `process_471_output`

## Kompatybilno≈õƒá:
- Vast.ai
- Google Colab
- Jupyter Notebook (lokalnie)

In [None]:
# Instalacja wymaganych pakiet√≥w (uruchom raz)
!pip install requests -q

In [None]:
# Importy
import os
import json
import requests
from datetime import datetime
from typing import Dict, List, Optional, Any

print("‚úÖ Importy za≈Çadowane pomy≈õlnie!")

In [None]:
# ‚öôÔ∏è KONFIGURACJA - ZMIE≈É WARTO≈öCI TUTAJ

API_URL = "https://api.sejm.gov.pl/sejm"
TERM = 10  # Kadencja X
PROCESS_NUMBER = 471  # Numer procesu do pobrania
OUTPUT_DIR = "process_471_output"
DOWNLOAD_ATTACHMENTS = True  # Czy pobieraƒá pliki za≈ÇƒÖcznik√≥w?

print(f"üìã Konfiguracja:")
print(f"   - Kadencja: {TERM}")
print(f"   - Numer procesu: {PROCESS_NUMBER}")
print(f"   - Folder wyj≈õciowy: {OUTPUT_DIR}")
print(f"   - Pobieranie za≈ÇƒÖcznik√≥w: {'Tak' if DOWNLOAD_ATTACHMENTS else 'Nie'}")

In [None]:
# Klasa g≈Ç√≥wna

class SejmProcessDownloader:
    """Pobiera i analizuje proces legislacyjny z Sejmu."""
    
    def __init__(self, term: int, process_number: int, output_dir: str):
        self.term = term
        self.process_number = process_number
        self.output_dir = output_dir
        self.process_data: Dict[str, Any] = {}
        self.attachments: List[Dict[str, Any]] = []
        self.tree_structure: List[Dict[str, Any]] = []
        
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
    def _make_request(self, url: str, timeout: int = 60) -> Optional[requests.Response]:
        """Wykonuje ≈ºƒÖdanie HTTP z obs≈ÇugƒÖ b≈Çƒôd√≥w."""
        try:
            resp = requests.get(url, timeout=timeout)
            if resp.status_code == 200:
                return resp
            else:
                print(f"‚ö†Ô∏è  HTTP {resp.status_code}: {url}")
                return None
        except requests.exceptions.RequestException as e:
            print(f"‚ùå B≈ÇƒÖd po≈ÇƒÖczenia: {e}")
            return None
    
    def fetch_process_info(self) -> bool:
        """Pobiera informacje o procesie."""
        print(f"\nüì• Pobieram informacje o procesie nr {self.process_number}...")
        
        url = f"{API_URL}/term{self.term}/processes"
        resp = self._make_request(url)
        
        if not resp:
            print("‚ùå Nie uda≈Ço siƒô pobraƒá listy proces√≥w")
            return False
            
        processes = resp.json()
        
        for proc in processes:
            prints = proc.get('prints', [])
            if self.process_number in prints or str(self.process_number) in [str(p) for p in prints]:
                self.process_data = proc
                print(f"‚úÖ Znaleziono proces: {proc.get('title', 'Brak tytu≈Çu')[:100]}...")
                return True
        
        print(f"‚ùå Nie znaleziono procesu z drukiem nr {self.process_number}")
        return False
    
    def fetch_print_details(self, print_number: int) -> Optional[Dict[str, Any]]:
        """Pobiera szczeg√≥≈Çy druku."""
        url = f"{API_URL}/term{self.term}/prints/{print_number}"
        resp = self._make_request(url)
        if resp:
            return resp.json()
        return None
    
    def download_attachment(self, print_number: int, filename: str) -> Optional[str]:
        """Pobiera za≈ÇƒÖcznik i zapisuje na dysk."""
        url = f"{API_URL}/term{self.term}/prints/{print_number}/{filename}"
        resp = self._make_request(url)
        
        if resp:
            print_dir = os.path.join(self.output_dir, f"druk_{print_number}")
            if not os.path.exists(print_dir):
                os.makedirs(print_dir)
            
            filepath = os.path.join(print_dir, filename)
            with open(filepath, 'wb') as f:
                f.write(resp.content)
            
            return filepath
        return None
    
    def build_tree(self) -> List[Dict[str, Any]]:
        """Buduje drzewo chronologiczne i powiƒÖzaniowe."""
        tree = []
        
        if not self.process_data:
            return tree
        
        process_node = {
            "level": 0,
            "type": "PROCES",
            "id": self.process_data.get('number', self.process_number),
            "title": self.process_data.get('title', 'Brak tytu≈Çu'),
            "description": self.process_data.get('description', ''),
            "document_type": self.process_data.get('documentType', ''),
            "state": self.process_data.get('state', ''),
            "term": self.term,
            "children": []
        }
        
        prints = self.process_data.get('prints', [])
        print(f"\nüìã Znaleziono {len(prints)} druk√≥w do przetworzenia")
        
        for idx, print_num in enumerate(prints):
            print(f"\nüìÑ [{idx+1}/{len(prints)}] Przetwarzam druk nr {print_num}...")
            
            print_details = self.fetch_print_details(print_num)
            
            if print_details:
                print_node = {
                    "level": 1,
                    "type": "DRUK",
                    "number": print_num,
                    "title": print_details.get('title', ''),
                    "document_date": print_details.get('documentDate', ''),
                    "delivery_date": print_details.get('deliveryDate', ''),
                    "change_date": print_details.get('changeDate', ''),
                    "attachments": []
                }
                
                attachments = print_details.get('attachments', [])
                print(f"   üìé Za≈ÇƒÖczniki: {len(attachments)}")
                
                for att_idx, att in enumerate(attachments):
                    att_node = {
                        "level": 2,
                        "type": "ZA≈ÅƒÑCZNIK",
                        "filename": att,
                        "download_url": f"{API_URL}/term{self.term}/prints/{print_num}/{att}",
                        "local_path": None
                    }
                    
                    if DOWNLOAD_ATTACHMENTS:
                        print(f"      ‚¨áÔ∏è  [{att_idx+1}/{len(attachments)}] Pobieram: {att}")
                        local_path = self.download_attachment(print_num, att)
                        if local_path:
                            att_node["local_path"] = local_path
                            print(f"      ‚úÖ Zapisano: {local_path}")
                        else:
                            print(f"      ‚ùå B≈ÇƒÖd pobierania")
                    
                    print_node["attachments"].append(att_node)
                    self.attachments.append(att_node)
                
                process_node["children"].append(print_node)
            else:
                print(f"   ‚ö†Ô∏è  Nie uda≈Ço siƒô pobraƒá szczeg√≥≈Ç√≥w druku {print_num}")
        
        tree.append(process_node)
        self.tree_structure = tree
        return tree
    
    def print_tree_ascii(self) -> str:
        """Generuje tekstowe drzewo ASCII."""
        output_lines = []
        
        def add_node(node, prefix="", is_last=True):
            connector = "‚îî‚îÄ‚îÄ " if is_last else "‚îú‚îÄ‚îÄ "
            
            if node["type"] == "PROCES":
                output_lines.append(f"üìÇ PROCES: {node['title'][:80]}...")
                output_lines.append(f"   Stan: {node['state']}")
                output_lines.append(f"   Typ dokumentu: {node['document_type']}")
                output_lines.append("")
                
                children = node.get("children", [])
                for idx, child in enumerate(children):
                    is_last_child = (idx == len(children) - 1)
                    add_node(child, "", is_last_child)
                    
            elif node["type"] == "DRUK":
                output_lines.append(f"{prefix}{connector}üìÑ DRUK NR {node['number']}")
                output_lines.append(f"{prefix}{'    ' if is_last else '‚îÇ   '}   Tytu≈Ç: {node['title'][:60]}...")
                output_lines.append(f"{prefix}{'    ' if is_last else '‚îÇ   '}   Data dokumentu: {node['document_date']}")
                output_lines.append(f"{prefix}{'    ' if is_last else '‚îÇ   '}   Data dostarczenia: {node['delivery_date']}")
                
                attachments = node.get("attachments", [])
                for att_idx, att in enumerate(attachments):
                    is_last_att = (att_idx == len(attachments) - 1)
                    att_prefix = prefix + ("    " if is_last else "‚îÇ   ")
                    att_connector = "‚îî‚îÄ‚îÄ " if is_last_att else "‚îú‚îÄ‚îÄ "
                    
                    status = "‚úÖ" if att.get("local_path") else "üîó"
                    output_lines.append(f"{att_prefix}{att_connector}{status} {att['filename']}")
                
                output_lines.append("")
        
        for node in self.tree_structure:
            add_node(node)
        
        return "\n".join(output_lines)
    
    def generate_chronological_tree(self) -> str:
        """Generuje drzewo chronologiczne."""
        output_lines = []
        output_lines.append("=" * 80)
        output_lines.append("üìÖ DRZEWO CHRONOLOGICZNE")
        output_lines.append("=" * 80)
        output_lines.append("")
        
        events = []
        
        for node in self.tree_structure:
            if node["type"] == "PROCES":
                for child in node.get("children", []):
                    if child["type"] == "DRUK":
                        doc_date = child.get("document_date", "")
                        delivery_date = child.get("delivery_date", "")
                        
                        if doc_date:
                            events.append({
                                "date": doc_date,
                                "type": "Dokument",
                                "description": f"Druk nr {child['number']}: {child['title'][:50]}...",
                                "attachments": len(child.get("attachments", []))
                            })
                        
                        if delivery_date and delivery_date != doc_date:
                            events.append({
                                "date": delivery_date,
                                "type": "Dostarczenie",
                                "description": f"Dostarczenie druku nr {child['number']}",
                                "attachments": 0
                            })
        
        events.sort(key=lambda x: x.get("date", ""))
        
        for event in events:
            output_lines.append(f"üìÜ {event['date']}")
            output_lines.append(f"   [{event['type']}] {event['description']}")
            if event['attachments'] > 0:
                output_lines.append(f"   üìé Za≈ÇƒÖczniki: {event['attachments']}")
            output_lines.append("")
        
        return "\n".join(output_lines)
    
    def save_results(self):
        """Zapisuje wyniki do plik√≥w."""
        print("\nüíæ Zapisywanie wynik√≥w...")
        
        # Dane JSON
        json_path = os.path.join(self.output_dir, "process_data.json")
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump({
                "process": self.process_data,
                "tree": self.tree_structure,
                "attachments": self.attachments,
                "generated_at": datetime.now().isoformat()
            }, f, ensure_ascii=False, indent=2)
        print(f"   ‚úÖ Dane JSON: {json_path}")
        
        # Drzewo ASCII
        tree_path = os.path.join(self.output_dir, "drzewo_struktury.txt")
        with open(tree_path, 'w', encoding='utf-8') as f:
            f.write("=" * 80 + "\n")
            f.write("üå≥ DRZEWO STRUKTURY PROCESU LEGISLACYJNEGO\n")
            f.write(f"   Numer procesu: {self.process_number}\n")
            f.write(f"   Kadencja: {self.term}\n")
            f.write(f"   Data wygenerowania: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write("=" * 80 + "\n\n")
            f.write(self.print_tree_ascii())
        print(f"   ‚úÖ Drzewo struktury: {tree_path}")
        
        # Drzewo chronologiczne
        chrono_path = os.path.join(self.output_dir, "drzewo_chronologiczne.txt")
        with open(chrono_path, 'w', encoding='utf-8') as f:
            f.write(self.generate_chronological_tree())
        print(f"   ‚úÖ Drzewo chronologiczne: {chrono_path}")
        
        # Raport
        summary_path = os.path.join(self.output_dir, "raport_podsumowujacy.txt")
        with open(summary_path, 'w', encoding='utf-8') as f:
            f.write("=" * 80 + "\n")
            f.write("üìä RAPORT PODSUMOWUJƒÑCY\n")
            f.write("=" * 80 + "\n\n")
            f.write(f"Numer procesu: {self.process_number}\n")
            f.write(f"Kadencja: {self.term}\n")
            f.write(f"Tytu≈Ç: {self.process_data.get('title', 'N/A')}\n")
            f.write(f"Stan: {self.process_data.get('state', 'N/A')}\n")
            f.write(f"Typ dokumentu: {self.process_data.get('documentType', 'N/A')}\n\n")
            f.write(f"Liczba druk√≥w: {len(self.process_data.get('prints', []))}\n")
            f.write(f"Liczba za≈ÇƒÖcznik√≥w: {len(self.attachments)}\n\n")
            
            f.write("LINK DO STRONY SEJMU:\n")
            f.write(f"https://www.sejm.gov.pl/Sejm{self.term}.nsf/PrzebiegProc.xsp?nr={self.process_number}\n\n")
            
            f.write("POBRANE ZA≈ÅƒÑCZNIKI:\n")
            f.write("-" * 40 + "\n")
            for att in self.attachments:
                status = "‚úÖ Pobrano" if att.get("local_path") else "‚ùå Nie pobrano"
                f.write(f"  {status}: {att['filename']}\n")
                if att.get("local_path"):
                    f.write(f"     Lokalna ≈õcie≈ºka: {att['local_path']}\n")
        
        print(f"   ‚úÖ Raport: {summary_path}")
    
    def run(self):
        """Uruchamia ca≈Çy proces."""
        print("=" * 80)
        print("üèõÔ∏è  SEJM PROCESS DOWNLOADER")
        print(f"   Pobieranie procesu nr {self.process_number} z kadencji {self.term}")
        print("=" * 80)
        
        if not self.fetch_process_info():
            print("\n‚ùå Nie uda≈Ço siƒô pobraƒá informacji o procesie.")
            return False
        
        self.build_tree()
        
        print("\n" + "=" * 80)
        print("üå≥ DRZEWO STRUKTURY:")
        print("=" * 80)
        print(self.print_tree_ascii())
        
        print(self.generate_chronological_tree())
        
        self.save_results()
        
        print("\n" + "=" * 80)
        print("‚úÖ ZAKO≈ÉCZONO POMY≈öLNIE!")
        print(f"   Wyniki zapisane w: {os.path.abspath(self.output_dir)}")
        print("=" * 80)
        
        return True

print("‚úÖ Klasa SejmProcessDownloader za≈Çadowana!")

In [None]:
# üöÄ URUCHOMIENIE POBIERANIA

downloader = SejmProcessDownloader(
    term=TERM,
    process_number=PROCESS_NUMBER,
    output_dir=OUTPUT_DIR
)

downloader.run()

In [None]:
# üìä WY≈öWIETL POBRANE DANE

import json

json_path = os.path.join(OUTPUT_DIR, "process_data.json")
if os.path.exists(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    print("üìä PODSUMOWANIE POBRANYCH DANYCH:")
    print("=" * 50)
    print(f"Tytu≈Ç procesu: {data['process'].get('title', 'N/A')}")
    print(f"Stan: {data['process'].get('state', 'N/A')}")
    print(f"Liczba druk√≥w: {len(data['process'].get('prints', []))}")
    print(f"Liczba za≈ÇƒÖcznik√≥w: {len(data['attachments'])}")
    print(f"\nData wygenerowania: {data['generated_at']}")
else:
    print("‚ùå Brak danych - uruchom najpierw kom√≥rkƒô pobierania powy≈ºej.")

In [None]:
# üìÅ LISTA POBRANYCH PLIK√ìW

if os.path.exists(OUTPUT_DIR):
    print(f"üìÅ Zawarto≈õƒá folderu {OUTPUT_DIR}:")
    print("=" * 50)
    
    for root, dirs, files in os.walk(OUTPUT_DIR):
        level = root.replace(OUTPUT_DIR, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f'{indent}üìÇ {os.path.basename(root)}/')
        sub_indent = ' ' * 2 * (level + 1)
        for file in files:
            print(f'{sub_indent}üìÑ {file}')
else:
    print(f"‚ùå Folder {OUTPUT_DIR} nie istnieje.")