# TRAIN API DATA FETCHER 

#### DEPENDENCIES

<div class="alert alert-block alert-info">

The Python libraries used in this notebook are:
<ol>
    
<li><a href="https://json.org/json-en.html/">JSON</a> to work with JSON data.
 
</li><li><a href="https://requests.readthedocs.io/en/latest/">Requests</a> to make API calls.
    
</li><li><a href="https://docs.python.org/3/library/csv.html/">CSV</a> for reading and writing CSV files.
    
</li><li><a href="https://docs.python.org/3/library/random.html">Random</a> for shuffling the train station list in the script.
    
</li><li><a href="https://docs.python.org/3/library/time.html">Time</a> for time-related operations in the script.
    
</li><li><a href="https://docs.python.org/3/library/logging.html">Logging</a> for tracking events and errors during script execution.
    
</li><li><a href="https://docs.python.org/3/library/xml.etree.elementtree.html">xml.etree.ElementTree (ET)</a> to parse and work with XML data returned by the CTA API.

</ol>
</div>


In [None]:
import csv
import requests
import json
import random
import time
import xml.etree.ElementTree as ET
import logging

#### API KEYS

The config.json file defines a list named `TOMTOM_API_KEYS` containing multiple API keys. These keys are intended for use with the TomTom API, a location-based services platform. API keys are used to authenticate requests and access geospatial functionalities like mapping and routing. 

The config.json file also defines a variable named 'CTA_API_KEY' that is intended for accessing the Chicago Transit Authority's Train Tracker API.

#### TRAIN STATION IDs

The code below extracts map IDs from a CSV file named 'stops.txt' by iteratively appending valid map IDs (those within the range of 40000 to 49999, because these IDs represent every possible CTA train stop, per the CTA API documentation) to the `new_mapid_values` list. The extracted map IDs are then displayed as the output.

In [None]:
new_mapid_values = []
with open('stops.txt', 'r', newline='', encoding='utf-8') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        stop_id = row.get('stop_id', '')
        if stop_id.isdigit() and 40000 <= int(stop_id) <= 49999:
            new_mapid_values.append(stop_id)

new_mapid_values

The following contains two functions: `fetch_traffic_speed` and `fetch_CTA_and_traffic`. These functions work together to fetch data from the TomTom API for traffic speed and the Chicago Transit Authority (CTA) API for CTA arrival data. The data is combined and periodically saved to a CSV file named 'combined_data.csv'. The `fetch_CTA_and_traffic` function repeats this process in a loop, with a 5-minute interval between each data update.

#### SCRIPT FOR FETCHING RELEVANT TRANSIT DATA

In [None]:
# Initialize logging
logging.basicConfig(level=logging.INFO)

# Constants
TOMTOM_TRAFFIC_URL = 'https://api.tomtom.com/traffic/services/4/flowSegmentData/relative/10/json'
CTA_URL = 'https://lapi.transitchicago.com/api/1.0/ttarrivals.aspx'
CSV_HEADER = ['mapid', 'route', 'prd_time', 'arr_time', 'sch_bool', 'flt_bool', 'dly_bool', 'station_name',
              'dest_name', 'latitude', 'longitude', 'free_flow_speed', 'current_speed']

class TomTomAPIException(Exception):
    """Custom exception for TomTom API errors."""
    def __init__(self, message):
        super().__init__(message)

class TrafficDataCollector:
    def __init__(self, tomtom_api_keys, cta_api_key):
        self.tomtom_api_keys = tomtom_api_keys
        self.cta_api_key = cta_api_key
        self.mapid_list = []

    def load_mapid_values(self):
        with open('stops.txt', 'r', newline='', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                stop_id = row.get('stop_id', '')
                if stop_id.isdigit() and 40000 <= int(stop_id) <= 49999:
                    self.mapid_list.append(stop_id)

    def fetch_traffic_speed(self, latitude, longitude, retry_keys=None):
        api_keys = retry_keys or self.tomtom_api_keys
        remaining_keys = list(api_keys)

        while remaining_keys:
            current_api_key = remaining_keys.pop(0)
            traffic_params = {
                'point': f'{latitude},{longitude}',
                'unit': 'mph',
                'key': current_api_key
            }

            try:
                traffic_response = requests.get(TOMTOM_TRAFFIC_URL, params=traffic_params)
                traffic_response.raise_for_status()  # Raise an exception for HTTP errors

                traffic_data = traffic_response.json()
                flow_segment_data = traffic_data.get("flowSegmentData", {})
                free_flow_speed_mph = flow_segment_data.get("freeFlowSpeed")
                current_speed_mph = flow_segment_data.get("currentSpeed")
                if free_flow_speed_mph and current_speed_mph:
                    return free_flow_speed_mph, current_speed_mph
            except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
                logging.error("Error with the TomTom API: %s", e)

                if remaining_keys:
                    logging.info("Retrying with the next key...")
                    time.sleep(2)  # Wait for 2 seconds before retrying with the next key
                    continue

        logging.error("Exhausted all available API keys.")
        return None, None

    def fetch_CTA_and_traffic(self):
        while True:
            with open('combined_data.csv', 'a', newline='', encoding='utf-8') as csv_file:
                csv_writer = csv.writer(csv_file)
                
                # Write header once
                if csv_file.tell() == 0:
                    csv_writer.writerow(CSV_HEADER)

                # Shuffle the mapid_list
                random.shuffle(self.mapid_list)

                for mapid in self.mapid_list:
                    cta_params = {
                        'mapid': mapid,
                        'max': '7',
                        'key': self.cta_api_key
                    }

                    try:
                        cta_response = requests.get(CTA_URL, params=cta_params)
                        cta_response.raise_for_status()  # Raise an exception for HTTP errors

                        root = ET.fromstring(cta_response.text)
                        eta_elements = root.findall(".//eta")

                        for eta in eta_elements:
                            self.process_cta_eta(mapid, eta, csv_writer)

                    except requests.exceptions.RequestException as e:
                        logging.error("Error with the CTA API: %s", e)
                        time.sleep(30)  # Wait for 30 seconds before retrying

            time.sleep(5 * 60)  # 5 minutes in seconds

    def process_cta_eta(self, mapid, eta, csv_writer):
        # Extract values from the eta element
        route = eta.find("rt").text
        prd_time = eta.find("prdt").text
        arr_time = eta.find("arrT").text
        sch_bool = eta.find("isSch").text
        flt_bool = eta.find("isFlt").text
        dly_bool = eta.find("isDly").text
        station_name = eta.find("staNm").text
        dest_name = eta.find("destNm").text

        latitude_element = eta.find("lat")
        longitude_element = eta.find("lon")

        if latitude_element is not None and longitude_element is not None:
            latitude = latitude_element.text
            longitude = longitude_element.text

            if latitude and longitude:
                free_flow_speed, current_speed = self.fetch_traffic_speed(float(latitude), float(longitude))

                if free_flow_speed is not None:
                    csv_writer.writerow(
                        [mapid, route, prd_time, arr_time, sch_bool, flt_bool, dly_bool, station_name,
                         dest_name, latitude, longitude, free_flow_speed, current_speed])

def main():
    try:
        with open('config.json') as config_file:
            config_data = json.load(config_file)

        tomtom_api_keys = config_data["TOMTOM_API_KEYS"]
        cta_api_key = config_data["CTA_API_KEY"]

        collector = TrafficDataCollector(tomtom_api_keys, cta_api_key)
        collector.load_mapid_values()
        collector.fetch_CTA_and_traffic()
    except (FileNotFoundError, KeyError) as e:
        logging.error("Error loading configuration: %s", e)

if __name__ == "__main__":
    main()
