In [None]:
import json
import time
import logging
import socket
from datetime import datetime
import requests
import pandas as pd
from confluent_kafka import Producer


KAFKA_BROKER = 'broker:9092'
TRANSACTION_TOPIC = 'streaming'
LAG = 0.5

def create_producer():
    try:
        producer = Producer({
            "bootstrap.servers": KAFKA_BROKER,
            "client.id": socket.gethostname(),
            "enable.idempotence": True,
            "batch.size": 64000,
            "linger.ms": 10,
            "acks": "all",
            "retries": 5,
            "delivery.timeout.ms": 1000
        })
    except Exception as e:
        logging.exception("Nie mogę utworzyć producenta")
        producer = None
    return producer

def fetch_data():
    url = 'http://api.citybik.es/v2/networks/velib'
    response = requests.get(url)

    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
        return None

    data = response.json()
    stations = data['network']['stations']
    
    # Ekstrahowanie danych
    station_data = []
    for station in stations:
        station_info = {
            'Station Name': station['name'],
            'Latitude': station['latitude'],
            'Longitude': station['longitude'],
            'Number of empty slots': station.get('empty_slots', 0),
            'Number of free bikes': station.get('free_bikes', 0),
            'Number of e-bikes': station['extra'].get('ebikes', 0),
            'Timestamp': station.get('timestamp', 0).split('.')[0].split('T')[-1]
        }
        station_data.append(station_info)
        df=pd.DataFrame(station_data)
    return df

producer = create_producer()
previous_data = None

if producer is not None:
    while True:
        data = fetch_data()
        if data is not None:
            # Sprawdzamy czy dane są zmienione
            if previous_data is None or not data.equals(previous_data):
                # Jesli się zmienił
                print(data)
                previous_data = data

                for _, station in data.iterrows():
                    current_time = datetime.utcnow().isoformat()
                    station['current_time'] = current_time  # Add current time to DataFrame
                    record = station.to_json().encode("utf-8")
                    producer.produce(topic=TRANSACTION_TOPIC, value=record)
                    print(f"Sent: {fetch_data()}")
                    producer.flush()
                    time.sleep(LAG)
        else:
            print("Failed to fetch data. Retrying...")
            time.sleep(60)
