# MODULES

Import necessary libraries and modules.

In [None]:
import requests
import xml.etree.ElementTree as ET
import json
import csv
import time
import smtplib
import random
import sys

In [None]:
sys.version

# API KEYS

The config.json file defines a list named `TOMTOM_API_KEYS` containing multiple API keys. These keys are intended for use with the TomTom API, a location-based services platform. API keys are used to authenticate requests and access geospatial functionalities like mapping and routing. 

The config.json file also defines a variable named 'CTA_API_KEY' that is intended for accessing the Chicago Transit Authority's Train Tracker API.

# TRAIN STATION IDs

The code below extracts map IDs from a CSV file named 'stops.txt' by iteratively appending valid map IDs (those within the range of 40000 to 49999, because these IDs represent every possible CTA train stop, per the CTA API documentation) to the `new_mapid_values` list. The extracted map IDs are then displayed as the output.

In [None]:
new_mapid_values = []
with open('stops.txt', 'r', newline='', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        stop_id = row.get('stop_id', '')
        if stop_id.isdigit() and 40000 <= int(stop_id) <= 49999:
            new_mapid_values.append(stop_id)

new_mapid_values

The following contains two functions: `fetch_traffic_speed` and `fetch_CTA_and_traffic`. These functions work together to fetch data from the TomTom API for traffic speed and the Chicago Transit Authority (CTA) API for CTA arrival data. The data is combined and periodically saved to a CSV file named 'combined_data.csv'. The `fetch_CTA_and_traffic` function repeats this process in a loop, with a 5-minute interval between each data update.

# SCRIPT FOR SCRAPING RELEVANT TRANSIT DATA

In [None]:
# Load API keys from a configuration file
with open('config.json') as config_file:
    config_data = json.load(config_file)

TOMTOM_API_KEYS = config_data["TOMTOM_API_KEYS"]
CTA_API_KEY = config_data["CTA_API_KEY"]

def fetch_traffic_speed(latitude, longitude, retry_keys=None):
    """
    Fetch traffic speed data using TomTom API for a given latitude and longitude.

    Parameters:
        latitude (float): Latitude coordinate.
        longitude (float): Longitude coordinate.
        retry_keys (list, optional): List of TomTom API keys to retry in case of errors.

    Returns:
        tuple: A tuple containing free flow speed and current speed in mph.
    """

    traffic_url = 'https://api.tomtom.com/traffic/services/4/flowSegmentData/relative/10/json'

    api_keys = retry_keys or TOMTOM_API_KEYS
    remaining_keys = list(api_keys)

    retry_seconds = 2  # Default retry time in seconds

    while remaining_keys:
        current_api_key = remaining_keys.pop(0)

        traffic_params = {
            'point': f'{latitude},{longitude}',
            'unit': 'mph',
            'key': current_api_key
        }

        try:
            traffic_response = requests.get(traffic_url, params=traffic_params)

            if traffic_response.status_code == 200:
                try:
                    traffic_data = traffic_response.json()
                    if "flowSegmentData" in traffic_data:
                        flow_segment_data = traffic_data["flowSegmentData"]
                        free_flow_speed_mph = flow_segment_data.get("freeFlowSpeed", "N/A")
                        current_speed_mph = flow_segment_data.get("currentSpeed", "N/A")
                        return free_flow_speed_mph, current_speed_mph
                    else:
                        print("TomTom API response does not contain flowSegmentData.")
                except json.JSONDecodeError as e:
                    print("Error decoding JSON response:", e)
            elif traffic_response.status_code == 403:
                print(f"TomTom API request error - status code 403 (Forbidden). Switching to the next available key...")
                if remaining_keys:
                    print(f"Retrying with the next key in {retry_seconds} seconds...")
                    time.sleep(retry_seconds)  # Wait for the specified seconds before retrying
                    continue  # Retry with the next key
                else:
                    print("Exhausted all available API keys.")
            else:
                print("TomTom API request error - status code:", traffic_response.status_code)

        except requests.exceptions.RequestException as e:
            print("Request error:", e)
            if isinstance(e, requests.exceptions.ConnectionError) and "forcibly closed by the remote host" in str(e):
                if remaining_keys:
                    print("Retrying with the next key...")
                    continue  # Retry with the next key
                else:
                    print("Exhausted all available API keys.")
            break  # Exit loop for other types of errors

    print("Exhausted all available API keys.")
    return None, None

def fetch_CTA_and_traffic():
    """
    Fetch CTA arrival data and combine it with traffic speed data, then save to CSV file.

    This function continuously fetches CTA arrival data, fetches traffic speed using TomTom API,
    combines the data, and saves it to a CSV file every 5 minutes.
    """

    cta_url = 'https://lapi.transitchicago.com/api/1.0/ttarrivals.aspx'

    while True:
        with open('combined_data.csv', 'a', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)

            # Shuffle the new_mapid_values list
            random.shuffle(new_mapid_values)

            for mapid in new_mapid_values:
                cta_params = {
                    'mapid': mapid,
                    'max': '7',
                    'key': CTA_API_KEY
                }

                try:
                    cta_response = requests.get(cta_url, params=cta_params)
                    cta_response.raise_for_status()  # Raise an exception for HTTP errors

                    if cta_response.status_code == 200:
                        root = ET.fromstring(cta_response.text)
                        eta_elements = root.findall(".//eta")

                        for eta in eta_elements: # Look up 'CTA Train Tracker API Documentation' for these column descriptions
                            route = eta.find("rt").text
                            prd_time = eta.find("prdt").text
                            arr_time = eta.find("arrT").text
                            sch_bool = eta.find("isSch").text
                            flt_bool = eta.find("isFlt").text
                            dly_bool = eta.find("isDly").text
                            station_name = eta.find("staNm").text
                            dest_name = eta.find("destNm").text

                            latitude_element = eta.find("lat")
                            longitude_element = eta.find("lon")

                            if latitude_element is not None and longitude_element is not None:
                                latitude = latitude_element.text
                                longitude = longitude_element.text

                                if latitude and longitude:
                                    free_flow_speed, current_speed = fetch_traffic_speed(float(latitude), float(longitude))

                                    if free_flow_speed is not None:
                                        csv_writer.writerow(
                                            [mapid, route, prd_time, arr_time, sch_bool, flt_bool, dly_bool, station_name,
                                             dest_name, latitude, longitude, free_flow_speed, current_speed])
                                else:
                                    print("Invalid latitude or longitude in CTA API response.")
                            else:
                                print("Missing latitude or longitude in CTA API response.")

                except requests.exceptions.RequestException as e:
                    print("Request error:", e)
                    if isinstance(e, requests.exceptions.ConnectionError) and "forcibly closed by the remote host" in str(e):
                        print("Waiting for 30 seconds before retrying...")
                        time.sleep(30)  # Wait for 30 seconds before retrying
                        continue  # Retry with the next key
                    else:
                        break  # Exit loop for other types of errors

        time.sleep(5 * 60)  # x minutes in seconds

fetch_CTA_and_traffic()
