In [1]:
import requests
from kafka import KafkaProducer
import json

def send_json_data_from_url_to_kafka(topic, url, fields={}):
    # Messages
    messages = []

    # Kafka configuration
    kafka_config = {
        'bootstrap_servers': 'kafka1:9092',  # Update with your Kafka broker
    }

    # Initialize Kafka Producer
    producer = KafkaProducer(
        bootstrap_servers=kafka_config['bootstrap_servers'],
        value_serializer=lambda v: json.dumps(v).encode('utf-8')
    )
    
    print("Requesting : ", url)
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        print('data', data)
        # Iterate over each item in the list
        for item in data:
            print('item:', item)
            # Extract the required fields
            message = {field: item.get(field) for field in fields}
            messages.append(message)
            # Send the message to Kafka
            producer.send(topic, message)
            print(f"Sent message: {message}")

        # Ensure all messages are sent
        producer.flush()
        print(f"Sent {len(data)} records.")
        producer.close()
    else:
        print(f"Failed to fetch data: {response.status_code}")
    return messages

In [2]:
# https://open.tan.fr/doc/openapi#tag/Arrets/operation/getArretsHorairesTheoriques : 
arrets_fields = ["codeLieu", "libelle", "distance", "ligne"]

arrets_url = "https://open.tan.fr/ewp/arrets.json"

attente_fields = [
    'sens',
    'terminus',
    'infotrafic',
    'temps',
    'dernierDepart',
    'tempsReel',
    'ligne',
    'arret'
]

attente_url = "https://open.tan.fr/ewp/tempsattente.json/{}"

In [3]:
send_json_data_from_url_to_kafka("arrets", arrets_url, arrets_fields)

Requesting :  https://open.tan.fr/ewp/arrets.json
data [{'codeLieu': 'ABCH', 'libelle': 'Abbé Chérel', 'distance': None, 'ligne': [{'numLigne': '50'}, {'numLigne': '81'}, {'numLigne': '91'}, {'numLigne': 'NBI'}]}, {'codeLieu': 'ABDU', 'libelle': 'Abel Durand', 'distance': None, 'ligne': [{'numLigne': 'C20'}, {'numLigne': 'NO'}]}, {'codeLieu': 'ABLA', 'libelle': 'Avenue Blanche', 'distance': None, 'ligne': []}, {'codeLieu': 'ABOL', 'libelle': 'Adrienne Bolland', 'distance': None, 'ligne': [{'numLigne': '77'}]}, {'codeLieu': 'ACHA', 'libelle': 'Angle Chaillou', 'distance': None, 'ligne': [{'numLigne': '126'}, {'numLigne': '96'}]}, {'codeLieu': 'ADBO', 'libelle': 'Audubon', 'distance': None, 'ligne': [{'numLigne': '101'}, {'numLigne': '141'}, {'numLigne': '91'}, {'numLigne': 'E1'}]}, {'codeLieu': 'ADEL', 'libelle': 'Aimé Delrue', 'distance': None, 'ligne': [{'numLigne': '2'}, {'numLigne': '2B'}, {'numLigne': '3'}, {'numLigne': '3B'}, {'numLigne': 'NC'}]}, {'codeLieu': 'ADPI', 'libelle': '

[{'codeLieu': 'ABCH',
  'libelle': 'Abbé Chérel',
  'distance': None,
  'ligne': [{'numLigne': '50'},
   {'numLigne': '81'},
   {'numLigne': '91'},
   {'numLigne': 'NBI'}]},
 {'codeLieu': 'ABDU',
  'libelle': 'Abel Durand',
  'distance': None,
  'ligne': [{'numLigne': 'C20'}, {'numLigne': 'NO'}]},
 {'codeLieu': 'ABLA',
  'libelle': 'Avenue Blanche',
  'distance': None,
  'ligne': []},
 {'codeLieu': 'ABOL',
  'libelle': 'Adrienne Bolland',
  'distance': None,
  'ligne': [{'numLigne': '77'}]},
 {'codeLieu': 'ACHA',
  'libelle': 'Angle Chaillou',
  'distance': None,
  'ligne': [{'numLigne': '126'}, {'numLigne': '96'}]},
 {'codeLieu': 'ADBO',
  'libelle': 'Audubon',
  'distance': None,
  'ligne': [{'numLigne': '101'},
   {'numLigne': '141'},
   {'numLigne': '91'},
   {'numLigne': 'E1'}]},
 {'codeLieu': 'ADEL',
  'libelle': 'Aimé Delrue',
  'distance': None,
  'ligne': [{'numLigne': '2'},
   {'numLigne': '2B'},
   {'numLigne': '3'},
   {'numLigne': '3B'},
   {'numLigne': 'NC'}]},
 {'codeLie

In [8]:
def send_object_data_from_url_to_kafka(topic, url, fields):

    # Kafka configuration
    kafka_config = {
        'bootstrap_servers': 'kafka1:9092',  # Update with your Kafka broker
    }

    # Initialize Kafka Producer
    producer = KafkaProducer(
        bootstrap_servers=kafka_config['bootstrap_servers'],
        value_serializer=lambda v: json.dumps(v).encode('utf-8')
    )

    print('Requesting : ', url)
    response = requests.get(url)
    
    try:
        data = response.json()
    except ValueError:
        print(f"Error: Response from {url} is not in JSON format")
        return []

    messages = []
    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict):
                message = {field: item.get(field) for field in fields}
                print('Sent message:', message)
                messages.append(message)
                producer.send(topic, message)
            else:
                print(f"Warning: Expected a dictionary but got {type(item)}")
    elif isinstance(data, dict):
        message = {field: data.get(field) for field in fields}
        print('Sent message:', message)
        messages.append(message)
        producer.send(topic, message)
    else:
        print(f"Warning: Expected a list or dictionary but got {type(data)}")

    # Ensure all messages are sent
    producer.flush()
    print(f"Sent {len(data)} records.")
    producer.close()
    
    # Send messages to Kafka (implementation not shown)
    return messages

In [7]:
# Fetch the list of stops
response = requests.get(arrets_url)

horaires_fields = ["ligne", "arret", "codeCouleur", "notes", "horaires", "prochainsHoraires", "plageDeService"];

# Iterate over each stop and send the waiting times to Kafka
for item in response.json():
    arret_code = item['codeLieu']
    attentes = send_json_data_from_url_to_kafka("attente", attente_url.format(arret_code), attente_fields)
    print("attentes", attentes)
    for attente in attentes:
        codeArret = attente['arret']['codeArret']
        sens = attente['sens']
        num_ligne = attente['ligne']['numLigne']
        horaire_theorique_url = "https://open.tan.fr/ewp/horairesarret.json/{}/{}/{}".format(codeArret, num_ligne, sens)
        horaires = send_object_data_from_url_to_kafka("horaires", horaire_theorique_url, horaires_fields)

Requesting :  https://open.tan.fr/ewp/tempsattente.json/ABCH
data [{'sens': 1, 'terminus': 'Romanet', 'infotrafic': True, 'temps': '6mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH1'}}, {'sens': 2, 'terminus': 'Gare Maritime', 'infotrafic': True, 'temps': '12mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH2'}}]
item: {'sens': 1, 'terminus': 'Romanet', 'infotrafic': True, 'temps': '6mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH1'}}
Sent message: {'sens': 1, 'terminus': 'Romanet', 'infotrafic': True, 'temps': '6mn', 'dernierDepart': 'false', 'tempsReel': 'true', 'ligne': {'numLigne': '81', 'typeLigne': 3}, 'arret': {'codeArret': 'ABCH1'}}
item: {'sens': 2, 'terminus': 'Gare Maritime', 'infotrafic': True, 'temps': '12mn', 'dernierDepart': 'false', 'tempsReel

KeyboardInterrupt: 