In [1]:
import geopandas as gpd
import pandas as pd
import urllib.request, json

In [7]:
def api_request(stations, subscription_key):
    '''This function makes an API request from the NS server and returns
     the trajectories of the Dutch railway map.

     Args:
        stations (str): Two stations of which you want the trajectory in the form 'Station1,Station2'
        subscription_key (str): An subscribtion key 

    Returns:
        gdf (str): GeoDataFrame with lines between stations
    '''
    
    url = f"https://gateway.apiportal.ns.nl/Spoorkaart-API/api/v1/traject.geojson?stations={stations}"
    headers = {
        'Cache-Control': 'no-cache',
        'Ocp-Apim-Subscription-Key': subscription_key,
    }

    # Make the API request
    try:
        req = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(req)
        response_content = response.read().decode('utf-8')

        # Load the response content into JSON
        geojson_data = json.loads(response_content)

        # Convert the GeoJSON data to a GeoDataFrame
        gdf = gpd.GeoDataFrame.from_features(geojson_data['features'])

        return gdf
    except urllib.error.HTTPError as e:
        print(f"HTTPError: {e.code} - {e.reason}")
        return None
    except Exception as e:
        print(f"Error: {e}")
        return None

In [10]:
def travel_data(operators, seats_per_vehicle_type):
    #   NS Bron aanpassen
    # Inladen CSV en toevoegen kolommen voor Stoelen en Bezette stoelen
    travel_data = pd.DataFrame()
    for i in operators:
        if i == 'Keolis':
            Keolis = pd.read_csv('Keolis.csv', delimiter= ';')
            travel_data = pd.concat([travel_data, Keolis], ignore_index=True)
        elif i == 'Arriva':
            Arriva = pd.read_csv('Arriva.csv')
            travel_data = pd.concat([travel_data, Arriva], ignore_index=True)
        elif i == 'Qbuzz':
            Qbuzz = pd.read_csv('Qbuzz.csv')
            travel_data = pd.concat([travel_data, Qbuzz], ignore_index=True)
        elif i == 'NS':
            NS = pd.read_csv('OC_NS_20241007.csv')
            travel_data = pd.concat([travel_data, NS], ignore_index=True)
        else:
            return print("Operator not known, known operators: [Keolis, Arriva, NS, Qbuzz]")
    travel_data['Seats'] = 0

    # Maar 1 week behouden dus 3 dagen weghalen uit de tabel
    travel_data['OperatingDay'] = pd.to_datetime(travel_data['OperatingDay'])
    dates_to_exclude = pd.to_datetime(['2024-10-14', '2024-10-15', '2024-10-16'])
    df_filtered = travel_data[~travel_data['OperatingDay'].isin(dates_to_exclude)]
    travel_data = df_filtered.reset_index(drop=True)

    #treincodes boven 700000 aanpassen
    for i in range(len(travel_data)):
        if travel_data.loc[i, 'JourneyNumber'] > 700000:
            travel_data.loc[i, 'JourneyNumber'] -= 700000

    #treincode tussen 200000 en 700000 aanpassen
    for i in range(len(travel_data)):
        if 200000 < travel_data.loc[i, 'JourneyNumber'] < 700000:
            travel_data.loc[i, 'JourneyNumber'] -= 200000
            
    return travel_data

In [8]:
# function to assign a color to a number based on where in the range of numbers it lies
def interpolate_color(lower_limit, upper_limit, lower_color, upper_color, number):
    # Clamp the number between the lower and upper limit
    number = max(min(number, upper_limit), lower_limit)
    
    # Calculate the interpolation factor
    factor = (number - lower_limit) / (upper_limit - lower_limit)
    
    # Interpolate each RGB component
    interpolated_color = tuple(
        int(lower_component + (upper_component - lower_component) * factor)
        for lower_component, upper_component in zip(lower_color, upper_color)
    )
    
    #normalize the color for usage
    color = [interpolated_color / 255.0 for interpolated_color in interpolated_color] 

    return color

In [9]:
# function to make a dictionary with the seats form the transervices csv
def seat_sorter(TrainServicesData, TrainTravelData):
    # Inladen CSV's met alle NS treindiensten en de reizigers data
    TrainServices = pd.read_csv(TrainServicesData, delimiter = ';')
    

    # Dictionary maken met per treinserie een dataframe met ieder stukje tussen stations er in
    dataframes_dict = {}

    TrainServices.loc[0,'Code']
    TrainServices.loc[0,'String'].split(',')

    for i in range(len(TrainServices)):
        a = TrainServices.loc[i,'String'].split(',')
        df = pd.DataFrame({'From':[],'To':[],'Seats':[]})
        for j in range(len(a) - 1):
            new_row = pd.DataFrame({'From': [a[j]], 'To': [a[j + 1]], 'Seats':[None]})
            df = pd.concat([df, new_row], ignore_index=True)
        dataframes_dict[TrainServices.loc[i,'Code']] = df

        a = TrainServices.loc[i,'String'].split(',')
        a = a[::-1]
        df = pd.DataFrame({'From':[],'To':[],'Seats':[]})
        for j in range(len(a) - 1):
            new_row = pd.DataFrame({'From': [a[j]], 'To': [a[j + 1]], 'Seats':[None]})
            df = pd.concat([df, new_row], ignore_index=True)
        dataframes_dict[TrainServices.loc[i,'Code'] + 1] = df

    # Dictionary vullen met stoel aantallen
    for i in range(len(TrainTravelData)):
        if TrainTravelData.loc[i, 'JourneyNumber'] % 2 != 0:
            a = TrainTravelData.loc[i, 'JourneyNumber']//100
            c = 0
            for j in range(len(dataframes_dict[a*100+ 1])):
                if TrainTravelData.loc[i, 'UserStopCodeBegin'].upper() == dataframes_dict[a*100+ 1].loc[j, 'From'].upper():
                    c = 1
                if c == 1:
                    if dataframes_dict[a*100 + 1].loc[j, 'Seats'] == None:
                        dataframes_dict[a*100+ 1].loc[j, 'Seats'] = TrainTravelData.loc[i, 'Seats']
                    else:
                        dataframes_dict[a*100+ 1].loc[j, 'Seats'] += TrainTravelData.loc[i, 'Seats']
                if TrainTravelData.loc[i, 'UserStopCodeEnd'].upper() == dataframes_dict[a*100+ 1].loc[j, 'To'].upper():
                    break

        else:
            a = TrainTravelData.loc[i, 'JourneyNumber']//100
            c = 0
            for j in range(len(dataframes_dict[a*100])):
                if TrainTravelData.loc[i, 'UserStopCodeBegin'].upper() == dataframes_dict[a*100].loc[j, 'From'].upper():
                    c = 1
                if c == 1:
                    if dataframes_dict[a*100].loc[j, 'Seats'] == None:
                        dataframes_dict[a*100].loc[j, 'Seats'] = TrainTravelData.loc[i, 'Seats']
                    else:
                        dataframes_dict[a*100].loc[j, 'Seats'] += TrainTravelData.loc[i, 'Seats']
                if TrainTravelData.loc[i, 'UserStopCodeEnd'].upper() == dataframes_dict[a*100].loc[j, 'To'].upper():
                    break

    return dataframes_dict

In [11]:
def seats_per_trajectory(SortedSeats):
    #de dataframe met totale aantal stoelen per stukje route maken
    df = pd.DataFrame({'From':[],'To':[],'Seats':[]})

    for i in SortedSeats.keys():
        b = SortedSeats[i]
        d = 0
        c = 0
        for j in range(len(b)):
            if b.loc[j,'Seats'] == None:
                continue
            for k in range(len(df)):
                if b.loc[j,'From'].upper() == df.loc[k, 'From'].upper() and b.loc[j,'To'].upper() == df.loc[k, 'To'].upper():
                    c = 1
                    d = k
                    break
                if b.loc[j,'To'].upper() == df.loc[k, 'From'].upper() and b.loc[j,'From'].upper() == df.loc[k, 'To'].upper():
                    c = 1
                    d = k
                    break
            if c == 1:
                df.loc[d, 'Seats'] += b.loc[j,'Seats']
                c = 0
            else:
                new_row = pd.DataFrame({'From': [b.loc[j,'From'].upper()], 'To': [b.loc[j,'To'].upper()], 'Seats':[b.loc[j,'Seats']]})
                df = pd.concat([df, new_row], ignore_index=True)
    return df

In [12]:
def geometry_adder(df):    
    # Prompt for the SubscriptionKey before starting the loop
    SubscriptionKey = input('Please enter your SubscriptionKey: ')

    geometry_data = []

    for i in range(len(df)):
        while True:
            try:
                # Make the API request
                geodata = api_request(f'{df["From"][i]},{df["To"][i]}', SubscriptionKey)
                
                # If the API request is successful, add geometry data and break out of the loop
                if geodata is not None:
                    geometry_data.append(geodata['geometry'].iloc[0])
                    print(f'{i+1}/{len(df)}', end="\r")
                    break
                else:
                    # If the API fails, prompt for a new subscription key
                    SubscriptionKey = input('Invalid SubscriptionKey. Please enter a new SubscriptionKey: ')
            except Exception as e:
                print(f"An error occurred: {e}")
                SubscriptionKey = input('Error occurred. Please enter a new SubscriptionKey: ')

    df['geometry'] = geometry_data
    df = gpd.GeoDataFrame(df, geometry='geometry')
    return df

In [13]:
arriva = pd.read_csv('TravelDataArriva.csv')
ns = pd.read_csv('TravelDataNS.csv')
qbuzz = pd.read_csv('TravelDataQbuzz.csv')
keolis = pd.read_csv('TravelDataKeolis.csv')
all_operators = pd.read_csv('TravelData.csv')

In [32]:
# elke dag, elke week van elke vervoerder samen allemaal dezelfde scale intercity, sprinter & allebei
# hele week per vervoerder allemaal andere scale intercity, sprinter & allebei
intercities = [1000, 1100, 11400, 11600, 11700, 12600, 1400, 1500, 600, 1700, 17900, 1800, 200, 2000, 2100, 21400, 21500, 2200, 22200, 22400, 2300, 23400, 240, 2400, 24400, 2600, 2800, 2900, 3000, 3100, 3200, 3500, 3600, 32790, 3700, 37300, 37900, 3900, 4500, 500, 600, 700, 800, 900, 9200]
sprinters = [13300, 13800, 14300, 14900, 15400, 16400, 17800, 18900, 20100, 20200, 25400, 30400, 30700, 30800, 30900, 31000, 31100, 31200, 31300, 31400, 32000, 32200, 32300, 32400, 32500, 32700, 3300, 36900, 37000, 37100, 37200, 37400, 37500, 37600, 37700, 37800, 3800, 38000, 4000, 4300, 4400, 4600, 4800, 4900, 5000, 5100, 5200, 5400, 5500, 5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7900, 8000, 8100, 8500, 8600, 8700, 8800, 8900, 9000]
dates = [['2024-10-07', '2024-10-08', '2024-10-09', '2024-10-10', '2024-10-11', '2024-10-12', '2024-10-13'],['2024-10-07'], ['2024-10-08'], ['2024-10-09'], ['2024-10-10'], ['2024-10-11'], ['2024-10-12'], ['2024-10-13']]
trainoptions = ['intercities', 'sprinters', 'all']

intercities_plus_one = [x + 1 for x in intercities] 
intercities = intercities + intercities_plus_one       
sprinters_plus_one = [x + 1 for x in sprinters] 
sprinters = sprinters + sprinters_plus_one  


for i in range(len(dates)):
    print(f'{i+1}/{len(dates)}', end="\r")
    TravelData = all_operators
    datestoinclude = dates[i]
    df_filtered = TravelData[TravelData['OperatingDay'].isin(datestoinclude)]
    TravelData = df_filtered.reset_index(drop=True)
    SortedSeats = seat_sorter('TrainServices.csv',TravelData)
    for q, j in enumerate(trainoptions):
        print(f'{q+1}/{len(trainoptions)}', end="\r")
        if j == 'intercities':
            SortedSeats1 = {k: v for k, v in SortedSeats.items() if k in intercities}
        if j == 'sprinters':
            SortedSeats1 = {k: v for k, v in SortedSeats.items() if k in sprinters}
        elif j == 'all':
            SortedSeats1 = SortedSeats
        SeatsPerTrajectory = seats_per_trajectory(SortedSeats1)
        df = geometry_adder(SeatsPerTrajectory)
        #volgorde zo aanpassen dat de hogere waardes altijd later worden geplot en dus over de lagere waardes heen plotten
        df = df.sort_values(by='Seats', ascending=True).reset_index()
        # kleuren assignen en toevoegen aan de geopanda
        df['color'] = '0'
        for h in range(len(df)):
            df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))
        if len(dates[i]) > 2:
            df.to_csv(f'PlotDataWeek{trainoptions[q]}.csv')
        else:
            df.to_csv(f'PlotData{dates[i][0]}{trainoptions[q]}.csv')
        
    
    
    

2/3/274

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


403/403

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/8/430

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/255

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


3/3/388

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


3/8/420

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/255

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


3/3/388

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


4/8/420

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/260

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


392/392

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


425/425

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/248

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


392/392

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


6/8/423

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/266

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


401/401

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


7/8/427

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


259/259

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


3/3/358

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


8/8/405

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/3/257

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


372/372

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


405/405

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


In [33]:
# elke dag, elke week van elke vervoerder samen allemaal dezelfde scale intercity, sprinter & allebei
# hele week per vervoerder allemaal andere scale intercity, sprinter & allebei
intercities = [1000, 1100, 11400, 11600, 11700, 12600, 1400, 1500, 600, 1700, 17900, 1800, 200, 2000, 2100, 21400, 21500, 2200, 22200, 22400, 2300, 23400, 240, 2400, 24400, 2600, 2800, 2900, 3000, 3100, 3200, 3500, 3600, 32790, 3700, 37300, 37900, 3900, 4500, 500, 600, 700, 800, 900, 9200]
sprinters = [13300, 13800, 14300, 14900, 15400, 16400, 17800, 18900, 20100, 20200, 25400, 30400, 30700, 30800, 30900, 31000, 31100, 31200, 31300, 31400, 32000, 32200, 32300, 32400, 32500, 32700, 3300, 36900, 37000, 37100, 37200, 37400, 37500, 37600, 37700, 37800, 3800, 38000, 4000, 4300, 4400, 4600, 4800, 4900, 5000, 5100, 5200, 5400, 5500, 5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7900, 8000, 8100, 8500, 8600, 8700, 8800, 8900, 9000]
vehicle_type_seats = {"VIRM": 100, "DDZ": 100, "FLIRT FFF": 53, "ICM": 75,
                      "ICNG25": 52, "SLT": 54, "SNG": 50, "SW7-25KV": 48,
                      "SW9-25KV": 48, "GTW": 45, "Flirt": 57, "FLIRT": 57,
                      "Lint": 65, "WINK": 153}
dates = [['2024-10-07', '2024-10-08', '2024-10-09', '2024-10-10', '2024-10-11', '2024-10-12', '2024-10-13']]
trainoptions = ['intercities', 'sprinters', 'all']

intercities_plus_one = [x + 1 for x in intercities] 
intercities = intercities + intercities_plus_one       
sprinters_plus_one = [x + 1 for x in sprinters] 
sprinters = sprinters + sprinters_plus_one  
operators = ['Keolis', 'Arriva', 'NS', 'Qbuzz']

for i in range(len(operators)):
    print(f'{i+1}/{len(operators)}', end="\r")
    TravelData = pd.read_csv(f'TravelData{operators[i]}.csv')
    datestoinclude = ['2024-10-07', '2024-10-08', '2024-10-09', '2024-10-10', '2024-10-11', '2024-10-12', '2024-10-13']
    df_filtered = TravelData[TravelData['OperatingDay'].isin(datestoinclude)]
    TravelData = df_filtered.reset_index(drop=True)
    SortedSeats = seat_sorter('TrainServices.csv',TravelData)
    for q, j in enumerate(trainoptions):
        print(f'{q+1}/{len(trainoptions)}', end="\r")
        if j == 'intercities':
            SortedSeats1 = {k: v for k, v in SortedSeats.items() if k in intercities}
        if j == 'sprinters':
            SortedSeats1 = {k: v for k, v in SortedSeats.items() if k in sprinters}
        elif j == 'all':
            SortedSeats1 = SortedSeats
        SeatsPerTrajectory = seats_per_trajectory(SortedSeats1)
        df = geometry_adder(SeatsPerTrajectory)
        #volgorde zo aanpassen dat de hogere waardes altijd later worden geplot en dus over de lagere waardes heen plotten
        df = df.sort_values(by='Seats', ascending=True).reset_index()
        # kleuren assignen en toevoegen aan de geopanda
        df['color'] = '0'
        for h in range(len(df)):
            df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))
        df.to_csv(f'PlotDataWeek{operators[i]}{trainoptions[q]}.csv')
    
        
    
    
    

9/10

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


19/19

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/419

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


2/315

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


143/143

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


143/143

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


256/256

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


3/3/240

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


4/4/282

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


7/11

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))


11/11

  df.loc[h, 'color'] = str(interpolate_color(df['Seats'].min(), df['Seats'].max(), (255,255,0), (255,0,0), df.loc[h, 'Seats']))
