In [1]:
import requests
import pandas as pd
import datetime

In [7]:
def extract_train_status(train_no="12046"):
    # Example free railway status API (dummy endpoint for demo)
    url = f"https://rata.digitraffic.fi/api/v1/trains"
    response = requests.get(url)
    
    if response.status_code != 200:
        print("❌ Failed to fetch train data")
        return None
    
    data = response.json()
    return data

def transform_train_data(trains):
    records = []
    for train in trains:
        train_number = train.get("trainNumber")
        departure_date = train.get("departureDate")

        for row in train.get("timeTableRows", []):
            records.append({
                "TrainNumber": train_number,
                "DepartureDate": departure_date,
                "StationCode": row.get("stationShortCode"),
                "EventType": row.get("type"),  # ARRIVAL / DEPARTURE
                "ScheduledTime": row.get("scheduledTime"),
                "ActualTime": row.get("actualTime", None),
                "DelayMinutes": row.get("differenceInMinutes"),
            })
    return pd.DataFrame(records)

def load_to_csv(df, filename="train_status.csv"):
    df.to_csv(filename, mode="a", header=not pd.io.common.file_exists(filename), index=False)
    print(f"✅ Data loaded to {filename}")

if __name__ == "__main__":
    raw = extract_train_status()
    if raw:
        df = transform_train_data(raw)
        load_to_csv(df)
        print(df.head())


✅ Data loaded to train_status.csv
   TrainNumber DepartureDate StationCode  EventType             ScheduledTime  \
0           67    2025-09-15         HKI  DEPARTURE  2025-09-15T11:19:00.000Z   
1           67    2025-09-15         PSL    ARRIVAL  2025-09-15T11:24:00.000Z   
2           67    2025-09-15         PSL  DEPARTURE  2025-09-15T11:25:00.000Z   
3           67    2025-09-15         KÄP    ARRIVAL  2025-09-15T11:27:00.000Z   
4           67    2025-09-15         KÄP  DEPARTURE  2025-09-15T11:27:00.000Z   

                 ActualTime  DelayMinutes  
0  2025-09-15T11:19:46.000Z             1  
1  2025-09-15T11:24:07.000Z             0  
2  2025-09-15T11:25:33.000Z             1  
3  2025-09-15T11:27:31.000Z             1  
4  2025-09-15T11:27:31.000Z             1  
