# OpenSky Network - Flight Tracker

Fetch real-time flight data using REST data source.

**Available Regions:**
- `EUROPE`, `NORTH_AMERICA`, `SOUTH_AMERICA`, `ASIA`, `AUSTRALIA`, `AFRICA`, `GLOBAL`

In [None]:
# Setup
from pyspark.sql import SparkSession
from pyspark_datasources import rest_api_call, parse_array_response, parse_array_response_streaming, RestDataSource
import json

spark = SparkSession.builder.appName("OpenSky").master("local[*]").getOrCreate()

# Define regions (from opensky.py)
regions = {
    "EUROPE": {"lamin": 35.0, "lamax": 72.0, "lomin": -25.0, "lomax": 45.0},
    "NORTH_AMERICA": {"lamin": 7.0, "lamax": 72.0, "lomin": -168.0, "lomax": -60.0},
    "SOUTH_AMERICA": {"lamin": -56.0, "lamax": 15.0, "lomin": -90.0, "lomax": -30.0},
    "ASIA": {"lamin": -10.0, "lamax": 82.0, "lomin": 45.0, "lomax": 180.0},
    "AUSTRALIA": {"lamin": -50.0, "lamax": -10.0, "lomin": 110.0, "lomax": 180.0},
    "AFRICA": {"lamin": -35.0, "lamax": 37.0, "lomin": -20.0, "lomax": 52.0},
}

# Column names for OpenSky flight arrays
column_names = [
    "icao24", "callsign", "origin_country", "time_position", "last_contact",
    "longitude", "latitude", "geo_altitude", "on_ground", "velocity",
    "true_track", "vertical_rate", "sensors", "baro_altitude",
    "squawk", "spi", "category"
]

print("✓ Ready!")

## Batch Example - One-Time Fetch

In [None]:
# Choose region
region = "NORTH_AMERICA"
bbox = regions[region]

# Create input DataFrame
input_df = spark.createDataFrame([{"region": region, **bbox}])
input_df.show()

# Fetch flights
url = f"https://opensky-network.org/api/states/all?lamin={{lamin}}&lamax={{lamax}}&lomin={{lomin}}&lomax={{lomax}}"
response = rest_api_call(input_df, url=url, method="GET", queryType="querystring", partitions="1")

# Parse to individual flights
flights = parse_array_response(response, array_path="states", column_names=column_names, timestamp_field="time")

print(f"\n✓ Found {flights.count()} flights")
flights.select("region", "time", "icao24", "callsign", "origin_country", "latitude", "longitude").show(20)

## Streaming Example - Continuous Monitoring

Polls API every 10 seconds, shows individual flights, runs for 5 minutes.

In [None]:
import time

# Choose region
region = "NORTH_AMERICA"
bbox = regions[region]

# Create input DataFrame
input_df = spark.createDataFrame([{"region": region, **bbox}])
input_json = json.dumps(input_df.toPandas().to_dict(orient='records'))

# Register data source
spark.dataSource.register(RestDataSource)

# Configure streaming
url = "https://opensky-network.org/api/states/all?lamin={lamin}&lamax={lamax}&lomin={lomin}&lomax={lomax}"
stream_df = spark.readStream.format("rest") \
    .option("url", url) \
    .option("method", "GET") \
    .option("streaming", "true") \
    .option("inputData", input_json) \
    .option("queryType", "querystring") \
    .option("streamingInterval", "10") \
    .option("offsetType", "timestamp") \
    .option("offsetField", "time") \
    .option("initialOffset", "0") \
    .load()

# Parse to individual flights using helper
flights = parse_array_response_streaming(stream_df, array_path="states", column_names=column_names, timestamp_field="time")

# Select columns to display
flights_display = flights.select("region", "time", "icao24", "callsign", "origin_country", "latitude", "longitude")

# Start streaming
query = flights_display.writeStream.outputMode("append").format("console").option("numRows", 20).start()

print("✓ Streaming... (5 minutes)")
try:
    time.sleep(300)
except KeyboardInterrupt:
    pass

query.stop()
print("✓ Stopped")