In [1]:
import requests
from kafka import KafkaProducer
import json

def send_json_data_from_url_to_kafka(topic, url, fields={}):

    # Kafka configuration
    kafka_config = {
        'bootstrap_servers': 'kafka1:9092',  # Update with your Kafka broker
    }

    # Initialize Kafka Producer
    producer = KafkaProducer(
        bootstrap_servers=kafka_config['bootstrap_servers'],
        value_serializer=lambda v: json.dumps(v).encode('utf-8')
    )
    
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()

        # Iterate over each item in the list
        for item in data:
            # Extract the required fields
            message = {field: item.get(field) for field in fields}
            
            # Send the message to Kafka
            producer.send(topic, message)
            print(f"Sent message: {message}")

        # Ensure all messages are sent
        producer.flush()
        print(f"Sent {len(data)} records.")
        producer.close()
    else:
        print(f"Failed to fetch data: {response.status_code}")



In [2]:
# https://open.tan.fr/doc/openapi#tag/Arrets/operation/getArretsHorairesTheoriques : 
arrets_fields = ["codeLieu", "libelle", "distance", "ligne"]

arrets_url = "https://open.tan.fr/ewp/arrets.json"

attente_fields = [
    'sens',
    'terminus',
    'infotrafic',
    'temps',
    'dernierDepart',
    'tempsReel',
    'ligne',
    'arret'
]

attente_url = "https://open.tan.fr/ewp/tempsattente.json/{}"

In [3]:
send_json_data_from_url_to_kafka("arrets", arrets_url, arrets_fields)

Sent message: {'codeLieu': 'ABCH', 'libelle': 'Abbé Chérel', 'distance': None, 'ligne': [{'numLigne': '50'}, {'numLigne': '81'}, {'numLigne': '91'}, {'numLigne': 'NBI'}]}
Sent message: {'codeLieu': 'ABDU', 'libelle': 'Abel Durand', 'distance': None, 'ligne': [{'numLigne': 'C20'}, {'numLigne': 'NO'}]}
Sent message: {'codeLieu': 'ABLA', 'libelle': 'Avenue Blanche', 'distance': None, 'ligne': []}
Sent message: {'codeLieu': 'ABOL', 'libelle': 'Adrienne Bolland', 'distance': None, 'ligne': [{'numLigne': '77'}]}
Sent message: {'codeLieu': 'ACHA', 'libelle': 'Angle Chaillou', 'distance': None, 'ligne': [{'numLigne': '126'}, {'numLigne': '96'}]}
Sent message: {'codeLieu': 'ADBO', 'libelle': 'Audubon', 'distance': None, 'ligne': [{'numLigne': '101'}, {'numLigne': '141'}, {'numLigne': '91'}, {'numLigne': 'E1'}]}
Sent message: {'codeLieu': 'ADEL', 'libelle': 'Aimé Delrue', 'distance': None, 'ligne': [{'numLigne': '2'}, {'numLigne': '2B'}, {'numLigne': '3'}, {'numLigne': '3B'}, {'numLigne': 'NC'}]

In [None]:
# Fetch the list of stops
response = requests.get(arrets_url)

# Iterate over each stop and send the waiting times to Kafka
for item in response.json():
    arret_code = item['codeLieu']
    send_json_data_from_url_to_kafka("attente", attente_url.format(arret_code), attente_fields)

Sent message: {'sens': 1, 'terminus': 'Romanet', 'infotrafic': True, 'temps': '13mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH1'}}
Sent message: {'sens': 2, 'terminus': 'Gare Maritime', 'infotrafic': True, 'temps': '18mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH2'}}
Sent 2 records.
Sent message: {'sens': 2, 'terminus': 'Gare Maritime', 'infotrafic': True, 'temps': '1mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': 'C20', 'typeLigne': 3}, 'arret': {'codeArret': 'ABDU1'}}
Sent message: {'sens': 1, 'terminus': 'École Centrale - Audencia', 'infotrafic': True, 'temps': '2mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': 'C20', 'typeLigne': 3}, 'arret': {'codeArret': 'ABDU2'}}
Sent message: {'sens': 1, 'terminus': 'Hangar à Bananes', 'infotrafic': True, 'temps': '', 'dernierDepart': 'f