In [31]:
from kafka import KafkaProducer
import json
import requests
import time
from datetime import datetime, timedelta
import random

producer = KafkaProducer(
    bootstrap_servers='broker:9092',
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

station_ids = [550, 552, 10955, 530, 10956]

# Pobranie współrzędnych
stations_url = "https://api.gios.gov.pl/pjp-api/rest/station/findAll"
stations_response = requests.get(stations_url)
stations_response.raise_for_status()
stations = stations_response.json()

coords_map = {
    s['id']: (s.get('gegrLat'), s.get('gegrLon'))
    for s in stations
    if s['id'] in station_ids
}

data_buffer = {sid: [] for sid in station_ids}
current_index = {sid: 0 for sid in station_ids}
sent_timestamps = {sid: set() for sid in station_ids}

def fetch_pm10_sensor_id(station_id):
    sensors_url = f"https://api.gios.gov.pl/pjp-api/rest/station/sensors/{station_id}"
    sensors_response = requests.get(sensors_url)
    sensors_response.raise_for_status()
    sensors = sensors_response.json()
    pm10_sensor = next((s for s in sensors if s['param']['paramCode'] == 'PM10'), None)
    return pm10_sensor['id'] if pm10_sensor else None

def fetch_pm10_data(sensor_id):
    data_url = f"https://api.gios.gov.pl/pjp-api/rest/data/getData/{sensor_id}"
    data_response = requests.get(data_url)
    data_response.raise_for_status()
    return data_response.json()

def update_data_buffer():
    for station_id in station_ids:
        try:
            sensor_id = fetch_pm10_sensor_id(station_id)
            if not sensor_id:
                print(f"❌ Brak sensora PM10 dla stacji {station_id}")
                continue
                
            lat, lon = coords_map.get(station_id, (None, None))
            
            if not lat or not lon:
                print(f"❌ Brak współrzędnych dla stacji {station_id}")
                continue

            data = fetch_pm10_data(sensor_id)

            new_messages = []

            
            for entry in reversed(data['values']):
                apply_boost = random.choice([550, 552, 530]) #anamalie
                timestamp = entry['date']
                value = entry['value']
                if value is not None and timestamp not in sent_timestamps[station_id]:
                    boosted_value = value + 300 if station_id == apply_boost else value #anamalie
                    msg = {
                        'station_id': station_id,
                        'reading_date': (datetime.now() + timedelta(hours=2)).strftime('%Y-%m-%d %H:%M:%S'),
                        'datetime_from_sensor': timestamp,
                        'value': boosted_value,
                        'unit': 'µg/m³',
                        'gegrLat': lat,
                        'gegrLon': lon
                    }
                    new_messages.append(msg)
                    sent_timestamps[station_id].add(timestamp)

            if new_messages:
                data_buffer[station_id].extend(new_messages)
                print(f"🆕 Dodano {len(new_messages)} nowych pomiarów dla stacji {station_id}")
            else:
                print(f"⏳ Brak nowych danych dla stacji {station_id}")

        except Exception as e:
            print(f"⚠️ Błąd podczas aktualizacji danych ze stacji {station_id}: {e}")

# Główna pętla
try:
    print("🚀 Uruchamianie systemu...")
    update_data_buffer()  # pierwsze załadowanie

    while True:
        any_sent = False

        for station_id in station_ids:
            index = current_index[station_id]
            buffer = data_buffer[station_id]

            if index < len(buffer):
                msg = buffer[index]
                producer.send('pm10', msg)
                print(f"📤 Wysłano z stacji {station_id}: {msg}")
                current_index[station_id] += 1
                any_sent = True
                time.sleep(5)

        producer.flush()

        if not any_sent:
            print(f"{(datetime.now() + timedelta(hours=2)).strftime('%Y-%m-%d %H:%M:%S')} 😴 Brak nowych danych do wysłania. Czekam 10 minut...")
            time.sleep(600)
            update_data_buffer()
        else:
            time.sleep(1)

except KeyboardInterrupt:
    print("🛑 Przerwano przez użytkownika.")
except Exception as e:
    print(f"❗ Błąd krytyczny: {e}")

🚀 Uruchamianie systemu...
🆕 Dodano 67 nowych pomiarów dla stacji 550
🆕 Dodano 67 nowych pomiarów dla stacji 552
🆕 Dodano 67 nowych pomiarów dla stacji 10955
🆕 Dodano 67 nowych pomiarów dla stacji 530
🆕 Dodano 67 nowych pomiarów dla stacji 10956
📤 Wysłano z stacji 550: {'station_id': 550, 'reading_date': '2025-06-06 20:58:12', 'datetime_from_sensor': '2025-06-04 02:00:00', 'value': 318.9, 'unit': 'µg/m³', 'gegrLat': '52.160772', 'gegrLon': '21.033819'}
📤 Wysłano z stacji 552: {'station_id': 552, 'reading_date': '2025-06-06 20:58:13', 'datetime_from_sensor': '2025-06-04 02:00:00', 'value': 15.6, 'unit': 'µg/m³', 'gegrLat': '52.290864', 'gegrLon': '21.042458'}
📤 Wysłano z stacji 10955: {'station_id': 10955, 'reading_date': '2025-06-06 20:58:13', 'datetime_from_sensor': '2025-06-04 02:00:00', 'value': 23.1, 'unit': 'µg/m³', 'gegrLat': '52.207742', 'gegrLon': '20.906073'}
📤 Wysłano z stacji 530: {'station_id': 530, 'reading_date': '2025-06-06 20:58:13', 'datetime_from_sensor': '2025-06-04 0