In [15]:
import geopandas as gpd
import matplotlib.pyplot as plt
import requests
import pandas as pd
import urllib.request, json

In [16]:
def API_Request(Stations, SubscriptionKey):
    
    #API Headers
    url = f"https://gateway.apiportal.ns.nl/Spoorkaart-API/api/v1/traject.geojson?stations={Stations}"
    headers = {
    'Cache-Control': 'no-cache',
    'Ocp-Apim-Subscription-Key': SubscriptionKey,
    }

    # Make the API request
    req = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(req)
    response_content = response.read().decode('utf-8')

    # Load the response content into JSON
    geojson_data = json.loads(response_content)

    # Convert the GeoJSON data to a GeoDataFrame
    gdf = gpd.GeoDataFrame.from_features(geojson_data['features'])

    return gdf


In [19]:
# function to make a dictionary with the seats form the transervices csv
def SeatSorter(TrainServicesData, TrainTravelData):
    # Inladen CSV met alle NS treindiensten
    TrainServices = pd.read_csv(TrainServicesData, delimiter = ';')

    # Dictionary maken met per treinserie een dataframe met ieder stukje tussen stations er in
    dataframes_dict = {}

    TrainServices.loc[0,'Code']
    TrainServices.loc[0,'String'].split(',')

    for i in range(len(TrainServices)):
        a = TrainServices.loc[i,'String'].split(',')
        b = a[::-1] #b is equal to a reversed
        a += b
        df = pd.DataFrame({'From':[],'To':[],'Seats':[]})
        for j in range(len(a) - 1):
            new_row = pd.DataFrame({'From': [a[j]], 'To': [a[j + 1]], 'Seats':[None]})
            df = pd.concat([df, new_row], ignore_index=True)
        dataframes_dict[TrainServices.loc[i,'Code']] = df

    # Dictionary vullen met stoel aantallen
    for i in range(len(TrainTravelData)):
        a = TrainTravelData.loc[i, 'JourneyNumber']//100
        c = 0
        for j in range(len(dataframes_dict[a*100])):
            if TrainTravelData.loc[i, 'UserStopCodeBegin'] == dataframes_dict[a*100].loc[j, 'From'].upper():
                c = 1
            if c == 1:
                if dataframes_dict[a*100].loc[j, 'Seats'] == None:
                    dataframes_dict[a*100].loc[j, 'Seats'] = TrainTravelData.loc[i, 'Seats']
                else:
                    dataframes_dict[a*100].loc[j, 'Seats'] += TrainTravelData.loc[i, 'Seats']
            if TrainTravelData.loc[i, 'UserStopCodeEnd'] == dataframes_dict[a*100].loc[j, 'To'].upper():
                break
    return dataframes_dict

In [58]:
# function to assign a color to a number based on where in the range of numbers it lies
def interpolate_color(lower_limit, upper_limit, lower_color, upper_color, number):
    # Clamp the number between the lower and upper limit
    number = max(min(number, upper_limit), lower_limit)
    
    # Calculate the interpolation factor
    factor = (number - lower_limit) / (upper_limit - lower_limit)
    
    # Interpolate each RGB component
    interpolated_color = tuple(
        int(lower_component + (upper_component - lower_component) * factor)
        for lower_component, upper_component in zip(lower_color, upper_color)
    )
    
    #normalize the color for usage
    color = [interpolated_color / 255.0 for interpolated_color in interpolated_color] 

    return color

In [7]:
#   NS Bron aanpassen
# Inladen CSV en toevoegen kolommen voor Stoelen en Bezette stoelen
TravelData = pd.read_csv('OC_NS_20241007.csv')
TravelData['Seats'] = 0
TravelData['Occupied Seats'] = 0

# Voor iedere treinserie het aantal stoelen toevoegen
for i in range(len(TravelData)):
    if TravelData.loc[i, 'VehicleType'] == "VIRM":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 100
    if TravelData.loc[i, 'VehicleType'] == "DDZ":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 100
    if TravelData.loc[i, 'VehicleType'] == "FLIRT FFF":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 53
    if TravelData.loc[i, 'VehicleType'] == "ICM":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 75
    if TravelData.loc[i, 'VehicleType'] == "ICNG25":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 52
    if TravelData.loc[i, 'VehicleType'] == "SLT":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 54
    if TravelData.loc[i, 'VehicleType'] == "SNG":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 50
    if TravelData.loc[i, 'VehicleType'] == "SW7-25KV":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 48
    if TravelData.loc[i, 'VehicleType'] == "SW9-25KV":
        TravelData.loc[i, 'Seats'] =  TravelData.loc[i, 'TotalNumberOfCoaches'] * 48

# Maar 1 week behouden dus 3 dagen weghalen uit de tabel
TravelData['OperatingDay'] = pd.to_datetime(TravelData['OperatingDay'])
dates_to_exclude = pd.to_datetime(['2024-10-14', '2024-10-15', '2024-10-16'])
df_filtered = TravelData[~TravelData['OperatingDay'].isin(dates_to_exclude)]
TravelData = df_filtered.reset_index(drop=True)

#treincodes boven 700000 aanpassen
for i in range(len(TravelData)):
    if TravelData.loc[i, 'JourneyNumber'] > 700000:
        TravelData.loc[i, 'JourneyNumber'] -= 700000

#treincode tussen 200000 en 700000 aanpassen
for i in range(len(TravelData)):
    if 200000 < TravelData.loc[i, 'JourneyNumber'] < 700000:
        TravelData.loc[i, 'JourneyNumber'] -= 200000

display(TravelData)

Unnamed: 0,DataOwnerCode,OperatingDay,LinePlanningNumber,JourneyNumber,ReinforcementNumber,TimingLinkOrder,UserStopCodeBegin,UserStopCodeEnd,Occupancy,VehicleType,TotalNumberOfCoaches,Seats,Occupied Seats
0,NS,2024-10-11,,6649,0,14,HT,TB,2,FLIRT FFF,7,371,0
1,NS,2024-10-11,,786,0,6,ASDZ,SHL,3,ICM,4,300,0
2,NS,2024-10-10,,3342,0,2,LAA,GVM,2,SNG,6,300,0
3,NS,2024-10-12,,7367,0,8,MRN,VNDW,2,SNG,4,200,0
4,NS,2024-10-15,,6681,0,9,HTO,HT,2,FLIRT FFF,3,159,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
317075,NS,2024-10-16,,3053,0,1,HDR,HDRZ,2,VIRM,4,400,0
317076,NS,2024-10-15,,2360,0,10,RTD,RTB,3,VIRM,4,400,0
317077,NS,2024-10-11,,2284,0,5,LEDN,LAA,3,VIRM,6,600,0
317078,NS,2024-10-14,,5864,0,5,HVS,HVSM,2,SNG,8,400,0


In [40]:
#   Voor iedere treinserie een dataframe maken
#Inladen CSV met alle NS treindiensten
TrainServices = pd.read_csv('TrainServices.csv', delimiter = ';')

#Dictionary maken met per treinserie een dataframe met ieder stukje tussen stations er in
dataframes_dict = {}

TrainServices.loc[0,'Code']
TrainServices.loc[0,'String'].split(',')

for i in range(len(TrainServices)):
    a = TrainServices.loc[i,'String'].split(',')
    df = pd.DataFrame({'From':[],'To':[],'Seats':[]})
    for j in range(len(a) - 1):
        new_row = pd.DataFrame({'From': [a[j]], 'To': [a[j + 1]], 'Seats':[None]})
        df = pd.concat([df, new_row], ignore_index=True)
    dataframes_dict[TrainServices.loc[i,'Code']] = df

Unnamed: 0,index,DataOwnerCode,OperatingDay,LinePlanningNumber,JourneyNumber,ReinforcementNumber,TimingLinkOrder,UserStopCodeBegin,UserStopCodeEnd,Occupancy,VehicleType,TotalNumberOfCoaches,Seats,Occupied Seats
0,1,NS,2024-10-11,,786,0,6,ASDZ,SHL,3,ICM,4,300,0
1,2,NS,2024-10-10,,3342,0,2,LAA,GVM,2,SNG,6,300,0
2,5,NS,2024-10-11,,4674,0,1,ASD,ASS,2,SNG,6,300,0
3,8,NS,2024-10-13,,6642,0,5,TB,HT,3,FLIRT FFF,4,212,0
4,11,NS,2024-10-09,,8864,0,1,UT,WD,3,SLT,6,324,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158497,317073,NS,2024-10-09,,3550,0,1,VL,BR,2,VIRM,6,600,0
158498,317074,NS,2024-10-07,,7572,0,3,WF,ED,2,FLIRT FFF,3,159,0
158499,317076,NS,2024-10-15,,2360,0,10,RTD,RTB,3,VIRM,4,400,0
158500,317077,NS,2024-10-11,,2284,0,5,LEDN,LAA,3,VIRM,6,600,0


In [18]:
a = SeatSorter('TrainServices.csv', TravelData)
print(a)
#print(TravelData.loc[20, 'Seats'])
#print(dataframes_dict)


KeyboardInterrupt: 

In [12]:
print(dataframes_dict)

NameError: name 'dataframes_dict' is not defined