In [1]:
from utils import config_logging, configure_kafka_producer, intialise_spark_session
from pyspark.sql.functions import from_json, col
from pyspark.sql.types import StructType, StructField, StringType, FloatType
import requests
import logging
import json

ModuleNotFoundError: No module named 'utils'

In [None]:
logging = config_logging()
spark = intialise_spark_session()

In [None]:
def extract_bike_networks(producer: str, topic: str) -> None:

    url = "http://api.citybik.es/v2/networks"

    try:
        
        response = requests.get(url)

        if response.status_code == 200:
            logging.info("Data fetched successfully.")
            data = response.json()
            
            for network in data["networks"]:
                if network["location"]["country"] in ["US", "GB", "CA", "AU"]:

                    network_location = network.get("location", {})

                    streaming_data =  {
                    "id": network.get("id"),
                    "name": network.get("name"),
                    "latitude": network_location.get("latitude"),
                    "longitude": network_location.get("longitude"),
                    "city": network_location.get("city"),
                    "country": network_location.get("country"),
                    "company": network.get("company")[0]

                    }
                
                    producer.send(topic, value=json.dumps(streaming_data).encode('utf-8'))
                    producer.flush()
                    
                    logging.info(f"Streaming data sent to Kafka topic {topic}")

    except requests.exceptions.RequestException as e:
        logging.error(f"Error fetching data: {e}")
        return None


In [None]:
def transform_schema():
    bike_networks = StructType([
        StructField("id", StringType(), True),
        StructField("name", StringType(), True),
        StructField("latitude", FloatType(), True),
        StructField("longitude", FloatType(), True),
        StructField("city", StringType(), True),
        StructField("country", StringType(), True),
        StructField("company", StringType(), True),
    ])

    return bike_networks

In [None]:
def consume_data(bootstrap_servers, topic):
    try:
        df = spark.readStream.format("kafka") \
            .option("kafka.bootstrap.servers", bootstrap_servers) \
            .option("subscribe", topic) \
            .option("delimiter", ",")\
            .load()
        logging.info("Data consumed successfully.")

        transformed_df = df.selectExpr("CAST(value AS STRING)")\
        .select(from_json(col("value", transform_schema)\
        .alias("data")))\
        .select("data.*")

        return transformed_df

    except Exception as e:
        logging.error(f"Error consuming data: {e}")
        return None
    
   

In [None]:
def main():
    try:
        spark = intialise_spark_session()
        bootstrap_servers = "bootstrap_servers"
        producer  = configure_kafka_producer(bootstrap_servers)
        topic = "list_of_bike_networks"

        if not spark or bootstrap_servers or producer or topic:
            logging.error("Missing variable configuration: check your configuration.")
            return

        extract_bike_networks(producer, topic)
        transformed_df = consume_data(bootstrap_servers, topic)

    except Exception as e:
        logging.error(f"An error occurred: {e}")
    finally:
        producer.close()
        spark.stop()
        
        logging.info("Spark session stopped.")


In [None]:
if __name__ == "__main__":
    main()

In [19]:
import requests
url = f"http://api.citybik.es/v2/networks"


response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    
    all_networks = []
    all_ids = []
    for network in data["networks"]:
        if network["location"]["country"] in ["US", "GB", "CA"]:

            network_location = network.get("location", {})

            streaming_data =  {
            "id": network.get("id"),
            "name": network.get("name"),
            "latitude": network_location.get("latitude"),
            "longitude": network_location.get("longitude"),
            "city": network_location.get("city"),
            "country": network_location.get("country"),
            "company": network.get("company")
            }
            all_ids.append(network.get("id"))
            all_networks.append(streaming_data)
        

In [12]:
all_ids = [ids["id"] for ids in all_networks]

In [20]:
all_ids

['acces-velo-saguenay',
 'avelo-quebec',
 'aventura',
 'bay-wheels',
 'belfastbikes-belfast',
 'bentonville',
 'beryl-bcp',
 'beryl-brighton',
 'beryl-cornwall',
 'beryl-dorchester-weymouth-portland',
 'beryl-eastleigh',
 'beryl-greater-manchester',
 'beryl-guildford',
 'beryl-hereford',
 'beryl-hertsmere',
 'beryl-leeds',
 'beryl-norwich',
 'beryl-plymouth',
 'beryl-stevenage',
 'beryl-watford',
 'beryl-west-midlands',
 'beryl-worcester',
 'bike-chattanooga',
 'biketown',
 'biketown-whq',
 'bismarck',
 'bixi-montreal',
 'bixi-toronto',
 'blue-bikes',
 'boulder',
 'broward',
 'bublr-bikes',
 'capital-bikeshare',
 'capmetro-austin',
 'charlotte',
 'cincy-red-bike',
 'citi-bike-miami',
 'citi-bike-nyc',
 'clemson',
 'co-bikes-exeter',
 'cogo',
 'denver',
 'desmoines',
 'divvy',
 'elpaso',
 'fortworth',
 'gobiki',
 'greenbikeslc',
 'greenville',
 'hi-bike-fort-william',
 'hi-bike-inverness',
 'hibike',
 'indego',
 'indiana-pacers-bikeshare',
 'jackson',
 'joco-new-york',
 'lincoln',
 'mad

In [1]:
all_idss = ['acces-velo-saguenay',
 'avelo-quebec',
 'aventura',
 'bay-wheels',
 'belfastbikes-belfast']

In [8]:
import aiohttp
import asyncio
import json


async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                data = await response.text()
                return data


    
tasks = []

urls = [f"http://api.citybik.es/v2/networks/{network_id}" for network_id in all_idss]

for url in urls:
    tasks.append(fetch(url))    
    print(tasks)

responses = await asyncio.gather(*tasks)

# get_ids = []
all_stations = []
for response in responses:
    if response:
        response = json.loads(response)
        # get_ids.append(response.get("network", {}).get("id"))

        tasks_stations = response.get("network", {}).get("stations", [])
        

        for stations in tasks_stations:
            streaming_data = ({
                "id": response.get("network", {}).get("id"),
                "station_id": stations["id"],
                "name": stations["name"],
                "latitude": stations["latitude"],
                "longitude": stations["longitude"],
                "timestamp": stations["timestamp"],
                "free_bikes": stations["free_bikes"],
                "empty_slots": stations["empty_slots"],
                "uid": stations["extra"].get("uid", None),
                "renting": stations["extra"].get("renting", 0),
                "returning": stations["extra"].get("returning", 0),
                "last_updated": stations["extra"].get("last_updated", 0),
                "address": stations["extra"].get("address", None),
                "has_ebikes": stations["extra"].get("has_ebikes", 0),
                "ebikes": stations["extra"].get("ebikes", 0),
                "normal_bikes": stations["extra"].get("normal_bikes", 0)
            })

            all_stations.append(streaming_data)

[<coroutine object fetch at 0x7fef64c6dee0>]
[<coroutine object fetch at 0x7fef64c6dee0>, <coroutine object fetch at 0x7fef64cf8c10>]
[<coroutine object fetch at 0x7fef64c6dee0>, <coroutine object fetch at 0x7fef64cf8c10>, <coroutine object fetch at 0x7fef64cf94d0>]
[<coroutine object fetch at 0x7fef64c6dee0>, <coroutine object fetch at 0x7fef64cf8c10>, <coroutine object fetch at 0x7fef64cf94d0>, <coroutine object fetch at 0x7fef64cf9540>]
[<coroutine object fetch at 0x7fef64c6dee0>, <coroutine object fetch at 0x7fef64cf8c10>, <coroutine object fetch at 0x7fef64cf94d0>, <coroutine object fetch at 0x7fef64cf9540>, <coroutine object fetch at 0x7fef64cf93f0>]


In [None]:
import json

# Open a file in write mode and save the JSON data
with open("sample_networks.json", "w") as json_file:
    json.dump(all_networks, json_file, indent=2)  # indent is optional, it formats the output to be more readable

In [None]:
import aiohttp
import asyncio
import json


async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                data = await response.text()
                return data


async def fetch_all_stations(url):  
    
    url = "http://api.citybik.es/v2/networks"
    urls = [f"{url}/{network_id}" for network_id in all_idss]

    tasks = []
    for url in urls:
        tasks.append(fetch(url))    
        

    responses = await asyncio.gather(*tasks)

    all_stations = []


    for response in responses:
        if response:
            response = json.loads(response)

            tasks_stations = response.get("network", {}).get("stations", [])
        

        for stations in tasks_stations:
            streaming_data = ({
                "id": response.get("network", {}).get("id"),
                "station_id": stations["id"],
                "name": stations["name"],
                "latitude": stations["latitude"],
                "longitude": stations["longitude"],
                "timestamp": stations["timestamp"],
                "free_bikes": stations["free_bikes"],
                "empty_slots": stations["empty_slots"],
                "uid": stations["extra"].get("uid", None),
                "renting": stations["extra"].get("renting", 0),
                "returning": stations["extra"].get("returning", 0),
                "last_updated": stations["extra"].get("last_updated", 0),
                "address": stations["extra"].get("address", None),
                "has_ebikes": stations["extra"].get("has_ebikes", 0),
                "ebikes": stations["extra"].get("ebikes", 0),
                "normal_bikes": stations["extra"].get("normal_bikes", 0)
            })

            all_stations.append(streaming_data)

    return all_stations
            
async def main():
    all_stations = await fetch_all_stations("http://api.citybik.es/v2/networks")
    return all_stations

all_stations = await main()

In [2]:
import aiohttp
import asyncio
import json
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col
from pyspark.sql.types import StructType, StructField, StringType, FloatType


spark = SparkSession.builder \
    .appName("BikeNetworkStreaming") \
    .getOrCreate()

async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                data = await response.text()
                return data


async def fetch_all_stations(url):  
    
    url = "http://api.citybik.es/v2/networks"
    urls = [f"{url}/{network_id}" for network_id in all_idss]

    tasks = []
    for url in urls:
        tasks.append(fetch(url))    
        

    responses = await asyncio.gather(*tasks)

    all_stations = []


    for response in responses:
        if response:
            response = json.loads(response)

            tasks_stations = response.get("network", {}).get("stations", [])
        

        for stations in tasks_stations:
            streaming_data = ({
                "id": response.get("network", {}).get("id"),
                "station_id": stations["id"],
                "name": stations["name"],
                "latitude": stations["latitude"],
                "longitude": stations["longitude"],
                "timestamp": stations["timestamp"],
                "free_bikes": stations["free_bikes"],
                "empty_slots": stations["empty_slots"],
                "uid": stations["extra"].get("uid", None),
                "renting": stations["extra"].get("renting", 0),
                "returning": stations["extra"].get("returning", 0),
                "last_updated": stations["extra"].get("last_updated", 0),
                "address": stations["extra"].get("address", None),
                "has_ebikes": stations["extra"].get("has_ebikes", 0),
                "ebikes": stations["extra"].get("ebikes", 0),
                "normal_bikes": stations["extra"].get("normal_bikes", 0)
            })

            all_stations.append(streaming_data)

    return all_stations
            
async def main():
    all_stations = await fetch_all_stations("http://api.citybik.es/v2/networks")
    return all_stations

all_stations = await main()

your 131072x1 screen size is bogus. expect trouble
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/04/15 19:11:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
all_stations 

[{'id': 'acces-velo-saguenay',
  'station_id': '0b1c0c48e94fae24b1a38f4559a5985a',
  'name': 'UQAC',
  'latitude': 48.42048,
  'longitude': -71.05338,
  'timestamp': '2025-04-15T18:09:42.393509Z',
  'free_bikes': 0,
  'empty_slots': 7,
  'uid': '2',
  'renting': True,
  'returning': True,
  'last_updated': 1730118608,
  'address': 'UQAC',
  'has_ebikes': True,
  'ebikes': 0,
  'normal_bikes': 0},
 {'id': 'acces-velo-saguenay',
  'station_id': '138fa8c4b31f783bd4af0afb3151df37',
  'name': 'Terminus Ste-Therese',
  'latitude': 48.4326802,
  'longitude': -71.175537,
  'timestamp': '2025-04-15T18:09:42.393721Z',
  'free_bikes': 0,
  'empty_slots': 7,
  'uid': '17',
  'renting': True,
  'returning': True,
  'last_updated': 1744740550,
  'address': 'Treminus Ste-Therese',
  'has_ebikes': True,
  'ebikes': 0,
  'normal_bikes': 0},
 {'id': 'acces-velo-saguenay',
  'station_id': '3aa299f4d3048ef94737bbf5fbc03825',
  'name': 'Place du Royaume',
  'latitude': 48.4044567,
  'longitude': -71.058208

In [None]:
network_location = network.get("location", {})

                    streaming_data =  {
                    "id": network.get("id"),
                    "name": network.get("name"),
                    "latitude": network_location.get("latitude"),
                    "longitude": network_location.get("longitude"),
                    "city": network_location.get("city"),
                    "country": network_location.get("country"),
                    "company": network.get("company")[0]
                    }

NameError: name 'true' is not defined

In [7]:
import json

# Open a file in write mode and save the JSON data
with open("sample_networks.json", "w") as json_file:
    json.dump(all_networks, json_file, indent=2)  # indent is optional, it formats the output to be more readable

NameError: name 'all_networks' is not defined

In [1]:
import json

# Load the saved JSON data
with open("sample_networks.json", "r") as json_file:
    all_networks = json.load(json_file)

In [None]:
import aiohttp
import asyncio
import json


async def fetch(url: str):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status == 200:
                data = await response.json()
                return data


async def fetch_all_stations(url: str, api_key: str, all_locations: dict) -> None:  

  
api_key = "d9e86164ec8843ae9a2200103251704"

city_list =  []
                    
tasks = []

for city in city_list:
    url = f"https://api.weatherapi.com/v1/current.json?key={api_key}&q={city}"
    tasks.append(fetch(url))


responses = await asyncio.gather(*tasks)


all_stations = []
                
if len(responses) == len(all_locations): 
    for response in responses:
        if response:
            location_data = response.get("location", {})
            current = response.get("current", {})

            if location_data and current:
                streaming_data = {
                    "city": location_data.get("name"),
                    "lat": location_data.get("lat"),
                    "lon": location_data.get("lon"),
                    "timezone": location_data.get("tz_id"),
                    "country": location_data.get("country"),
                    "localtime": location_data.get("localtime"),
                    "city": location_data.get("name"),
                    "last_updated": current.get("last_updated"),
                    "condition": current.get("condition", {}).get("text"),
                    "temp_c": current.get("temp_c"),
                    "wind_mph": current.get("wind_mph"),
                    "wind_degree": current.get("wind_degree"),
                    "wind_dir": current.get("wind_dir"),
                    "humidity": current.get("humidity"),
                    "cloud": current.get("cloud"),
                    "feelslike_c": current.get("feelslike_c"),
                    "heat_index": current.get("heatindex_c"),
                }

                all_stations.append(streaming_data)

    # return all_stations
            

# all_weather = await fetch_all_stations()

https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=48.433333,-71.083333
https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=40.7128,-74.0060
https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=40.736,-74.173


In [80]:
all_locations =  [
                {"location_id": '8e3dc927-4297-4ba8-96b3-4cf99eff8762', "latitude": 51.5074, "longitude": -0.1278},  
                {"location_id": '19d95950-6b2a-445c-b3c6-0847a97b4276', "latitude": 40.7128, "longitude": -74.0060},
                {"location_id": 'ad1c044c-3d84-4b1c-aa2d-2a3aa34e2143',"latitude": 35.6895, "longitude": 139.6917}]
                    




for location in all_locations:
    url = f"https://api.weatherapi.com/v1/current.json/?key={api_key}&q={location['latitude']},{location['longitude']}"
    print(url)

https://api.weatherapi.com/v1/current.json/?key=350f4d977ad24531a8565131251704&q=51.5074,-0.1278
https://api.weatherapi.com/v1/current.json/?key=350f4d977ad24531a8565131251704&q=40.7128,-74.006
https://api.weatherapi.com/v1/current.json/?key=350f4d977ad24531a8565131251704&q=35.6895,139.6917


In [61]:
import uuid

def generate_uuid():
    return str(uuid.uuid4()) # Generates a random UUID  

generate_uuid()




'ad1c044c-3d84-4b1c-aa2d-2a3aa34e2143'

In [None]:
# add to transformation script
import uuid
import pandas as pd
import os

df = df.withColumn("location_id", str(uuid.uuid4()))

if os.path.exists("reference_data/city_list.csv") == False:
    city_df = df.select("city").distinct()
    spark.write.csv(
    city_df, "reference_data/city_list.csv", header=True, mode="overwrite"
)


'8e3dc927-4297-4ba8-96b3-4cf99eff8762'

In [None]:
https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=48.433333,-71.083333
https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=40.7128,-74.0060
https://api.weatherapi.com/v1/current.json?key=d9e86164ec8843ae9a2200103251704&q=40.736,-74.173

In [None]:
import pandas as pd

location_df = pd.read_csv("reference_data/location_reference.csv")
location_dict = location_df.to_dict(orient="records")

In [67]:
all_locations =  [
                {"location_id": '8e3dc927-4297-4ba8-96b3-4cf99eff8762', "latitude": 51.5074, "longitude": -0.1278},  
                {"location_id": '19d95950-6b2a-445c-b3c6-0847a97b4276', "latitude": 40.7128, "longitude": -74.0060},
                {"location_id": 'ad1c044c-3d84-4b1c-aa2d-2a3aa34e2143',"latitude": 35.6895, "longitude": 139.6917}]



for location in all_locations:
    print(location["location_id"], location["latitude"], location["longitude"])



# urls = [f"baba{lat},{lon}" for lat, lon in all_locations.items()]
# urls

8e3dc927-4297-4ba8-96b3-4cf99eff8762 51.5074 -0.1278
19d95950-6b2a-445c-b3c6-0847a97b4276 40.7128 -74.006
ad1c044c-3d84-4b1c-aa2d-2a3aa34e2143 35.6895 139.6917


In [33]:

all_stations

[{'lat': 51.517,
  'lon': -0.106,
  'city': 'London',
  'temp_c': 8.3,
  'wind_mph': 2.2,
  'humidity': 76},
 {'lat': 40.714,
  'lon': -74.006,
  'city': 'New York',
  'temp_c': 6.7,
  'wind_mph': 11.6,
  'humidity': 51},
 {'lat': 35.69,
  'lon': 139.692,
  'city': 'Tokyo',
  'temp_c': 25.3,
  'wind_mph': 19.5,
  'humidity': 34}]

In [19]:
import json
data = {
    "location": {
        "name": "Bagotville",
        "region": "Quebec",
        "country": "Canada",
        "lat": 48.35,
        "lon": -70.883,
        "tz_id": "America/Toronto",
        "localtime_epoch": 1744874327,
        "localtime": "2025-04-17 03:18"
    },
    "current": {
        "last_updated": "2025-04-17 03:15",
        "temp_c": -0.9,
        "is_day": 0,
        "condition": {
            "text": "Clear"
        },
        "wind_mph": 15.2,
        "wind_degree": 281,
        "wind_dir": "WNW",
        "humidity": 81,
        "cloud": 0,
        "feelslike_c": -7.0,
        "heatindex_c": -2.6,
        "heatindex_f": 27.3,
        "uv": 0.0
    }
}


for response in data:
    if response:
        location = response["location"]
        current = response["current"]
        if location and current:
   
            streaming_data = {
                    "lat": location["lat"],
                    "lon":location["lon"],
                }

            print(streaming_data)




TypeError: string indices must be integers

In [23]:
all_locations =  {
    (51.5074, -0.1278): "London",  
    (40.7128, -74.0060): "New York",
    (35.6895, 139.6917): "Tokyo"
}

all_coordinates = [(lat, lon) for lat, lon in all_locations.keys()]
all_coordinates

[(51.5074, -0.1278), (40.7128, -74.006), (35.6895, 139.6917)]

In [20]:
# Assuming the JSON is stored in a variable called `weather_data`

weather_data = {
    "location": {
        "name": "Bagotville",
        "region": "Quebec",
        "country": "Canada",
        "lat": 48.35,
        "lon": -70.883,
        "tz_id": "America/Toronto",
        "localtime_epoch": 1744874327,
        "localtime": "2025-04-17 03:18"
    },
    "current": {
        "last_updated": "2025-04-17 03:15",
        "temp_c": -0.9,
        "is_day": 0,
        "condition": {
            "text": "Clear"
        },
        "wind_mph": 15.2,
        "wind_degree": 281,
        "wind_dir": "WNW",
        "humidity": 81,
        "cloud": 0,
        "feelslike_c": -7.0,
        "heatindex_c": -2.6,
        "heatindex_f": 27.3,
        "uv": 0.0
    }
}

# Extracting data
location_name = weather_data["location"]["name"]
country = weather_data["location"]["country"]
localtime = weather_data["location"]["localtime"]
temperature = weather_data["current"]["temp_c"]
feels_like = weather_data["current"]["feelslike_c"]
weather_condition = weather_data["current"]["condition"]["text"]
wind_speed = weather_data["current"]["wind_mph"]
humidity = weather_data["current"]["humidity"]

# Optional: Display
print(f"Location: {location_name}, {country}")
print(f"Local Time: {localtime}")
print(f"Temperature: {temperature}°C, Feels Like: {feels_like}°C")
print(f"Condition: {weather_condition}")
print(f"Wind Speed: {wind_speed} mph")
print(f"Humidity: {humidity}%")


Location: Bagotville, Canada
Local Time: 2025-04-17 03:18
Temperature: -0.9°C, Feels Like: -7.0°C
Condition: Clear
Wind Speed: 15.2 mph
Humidity: 81%


In [39]:
#version 2
import aiohttp
import asyncio
import json
import csv
import time



async def fetch(url: str) -> dict:
    """ Fetch data from API asynchronously """

    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                if response.status == 200:
                    data = await response.text()
                    return data
                logging.info("Data fetched successfully.")
    except aiohttp.ClientError as e:
        logging.error(f"Error fetching data: {e}")
        return None
    

    
async def fetch_all_stations():

    """ Fetch all bike networks asynchronously """


    all_ids = [ 'beryl-brighton',
                'beryl-cornwall',
                'beryl-dorchester-weymouth-portland']
    
    url = "http://api.citybik.es/v2/networks"
    urls = [f"{url}/{network_id}" for network_id in all_ids]

    tasks = []
    for url in urls:
        tasks.append(fetch(url))    
        

    responses = await asyncio.gather(*tasks)

    all_stations = []

    for response in responses:
        if response:
            response = json.loads(response)  # convert text (str) to json to extract the networks

        tasks_stations = response.get("network", {}).get("stations", [])
        

        for stations in tasks_stations:
            streaming_data = ({
                "id": response.get("network", {}).get("id"),
                "station_id": stations["id"],
                "name": stations["name"],
                "latitude": stations["latitude"],
                "longitude": stations["longitude"],
                "last_updated": stations["timestamp"],  # rename timestamp key to last_updated
                "free_bikes": stations["free_bikes"],
                "empty_slots": stations["empty_slots"],
                "uid": stations["extra"].get("uid", None),
                "renting": stations["extra"].get("renting", 0),
                "returning": stations["extra"].get("returning", 0),
                "address": stations["extra"].get("address", None),
                "has_ebikes": stations["extra"].get("has_ebikes", 0),
                "ebikes": stations["extra"].get("ebikes", 0),
                "normal_bikes": stations["extra"].get("normal_bikes", 0),
                "number": stations["extra"].get("number", 0),
                "slots": stations["extra"].get("slots", 0),
                "current_timestamp": time.time()             #insert runtime timestamp
            })
            
            all_stations.append(streaming_data)
    return all_stations

stations = await fetch_all_stations()

In [43]:
import json

# Open a file in write mode and save the JSON data
with open("sample_stations.json", "w") as json_file:
    json.dump(stations, json_file, indent=2)  # indent is optional, it formats the output to be more readable