In [1]:
import requests
import xml.etree.ElementTree as ET
from datetime import datetime

In [2]:
# API URL for getting current trains
CURRENT_TRAINS_URL = "http://api.irishrail.ie/realtime/realtime.asmx/getCurrentTrainsXML"

def get_train_codes():
    response = requests.get(CURRENT_TRAINS_URL)
    
    if response.status_code != 200:
        print("Failed to fetch current trains")
        return []

    root = ET.fromstring(response.text)
    namespace = {"ns": "http://api.irishrail.ie/realtime/"}
    train_codes = []

    for train in root.findall("ns:objTrainPositions", namespace):
        train_code = train.find("ns:TrainCode", namespace).text
        train_date_raw = train.find("ns:TrainDate", namespace).text  # Example: '31 Mar 2025'

        # Convert date format
        train_date = datetime.strptime(train_date_raw, "%d %b %Y").strftime("%d-%m-%Y")

        if train_code and train_date:
            train_codes.append((train_code, train_date))

    return train_codes


# Fetch current train codes
train_codes = get_train_codes()
print("Current Train Codes:", train_codes)

Current Train Codes: [('P559', '26-04-2025'), ('E909', '26-04-2025'), ('A110', '26-04-2025'), ('A206', '26-04-2025'), ('A208', '26-04-2025'), ('A802', '26-04-2025'), ('D910', '26-04-2025'), ('A211', '26-04-2025'), ('A302', '26-04-2025'), ('A209', '26-04-2025'), ('A431', '26-04-2025'), ('A600', '26-04-2025'), ('P211', '26-04-2025'), ('A303', '26-04-2025'), ('A508', '26-04-2025'), ('E809', '26-04-2025'), ('A483', '26-04-2025'), ('E103', '26-04-2025'), ('E908', '26-04-2025'), ('E210', '26-04-2025'), ('D206', '26-04-2025'), ('A407', '26-04-2025'), ('E808', '26-04-2025'), ('E211', '26-04-2025'), ('A706', '26-04-2025'), ('A709', '26-04-2025'), ('P747', '26-04-2025'), ('D909', '26-04-2025'), ('P748', '26-04-2025'), ('E212', '26-04-2025'), ('E807', '26-04-2025'), ('E104', '26-04-2025'), ('D806', '26-04-2025'), ('A107', '26-04-2025'), ('A902', '26-04-2025'), ('A905', '26-04-2025'), ('A108', '26-04-2025'), ('P710', '26-04-2025'), ('A106', '26-04-2025'), ('A462', '26-04-2025'), ('E209', '26-04-20

In [3]:
import requests
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
import pandas as pd

# API Namespace
NAMESPACE = {"ns": "http://api.irishrail.ie/realtime/"}

# Function to fetch train movements for a given TrainId and date
def get_train_movements(train_id, train_date):
    formatted_date = train_date.strftime("%d %b %Y")  # Convert to API format
    url = f"http://api.irishrail.ie/realtime/realtime.asmx/getTrainMovementsXML?TrainId={train_id}&TrainDate={formatted_date}"
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to fetch data for {train_id} on {formatted_date}")
        return []
    
    root = ET.fromstring(response.text)
    movements = []
    
    for movement in root.findall("ns:objTrainMovements", NAMESPACE):
        movements.append({
            "TrainCode": train_id,
            "TrainDate": formatted_date,
            "LocationCode": movement.find("ns:LocationCode", NAMESPACE).text,
            "LocationFullName": movement.find("ns:LocationFullName", NAMESPACE).text,
            "ScheduledArrival": movement.find("ns:ScheduledArrival", NAMESPACE).text,
            "ActualArrival": movement.find("ns:Arrival", NAMESPACE).text,
            "ScheduledDeparture": movement.find("ns:ScheduledDeparture", NAMESPACE).text,
            "ActualDeparture": movement.find("ns:Departure", NAMESPACE).text,
            "LocationType": movement.find("ns:LocationType", NAMESPACE).text,
            "TrainOrigin": movement.find("ns:TrainOrigin", NAMESPACE).text,
            "TrainDestination": movement.find("ns:TrainDestination", NAMESPACE).text
        })
    
    return movements

# Function to collect historical data for past N days
def collect_historical_data(train_ids, days_back=30):
    all_data = []
    
    for days_ago in range(days_back):
        date_to_check = datetime.today() - timedelta(days=days_ago)
        print(f"Checking trains for {date_to_check.strftime('%d-%m-%Y')}...")
        
        for train_id in train_ids:
            train_data = get_train_movements(train_id, date_to_check)
            all_data.extend(train_data)
    
    return pd.DataFrame(all_data)

# Fetch current train IDs
train_codes = get_train_codes()
train_ids = [train[0] for train in train_codes]  # Extract Train IDs only

# Collect historical data
df = collect_historical_data(train_ids, days_back=1095)

df.to_csv("irish_rail_historical_data.csv", index=False)
print(df.head())

Checking trains for 26-04-2025...
Checking trains for 25-04-2025...
Checking trains for 24-04-2025...
Checking trains for 23-04-2025...
Checking trains for 22-04-2025...
Checking trains for 21-04-2025...
Checking trains for 20-04-2025...
Checking trains for 19-04-2025...
Checking trains for 18-04-2025...
Checking trains for 17-04-2025...
Checking trains for 16-04-2025...
Checking trains for 15-04-2025...
Checking trains for 14-04-2025...
Checking trains for 13-04-2025...
Checking trains for 12-04-2025...
Checking trains for 11-04-2025...
Checking trains for 10-04-2025...
Checking trains for 09-04-2025...
Checking trains for 08-04-2025...
Checking trains for 07-04-2025...
Checking trains for 06-04-2025...
Checking trains for 05-04-2025...
Checking trains for 04-04-2025...
Checking trains for 03-04-2025...
Checking trains for 02-04-2025...
Checking trains for 01-04-2025...
Checking trains for 31-03-2025...
Checking trains for 30-03-2025...
Checking trains for 29-03-2025...
Checking train