In [1]:
import pandas as pd

routes = pd.read_csv('gtfs_data/routes.csv')
trips = pd.read_csv('gtfs_data/trips.csv')

fltr = ['BUS', 'EXP']
routes_filtered = routes[routes['route_short_name'].isin(fltr) == False]

In [2]:
# Perform a left merge (add columns from trips DataFrame if route_id exists in both)
merged_df = pd.merge(routes_filtered, trips[['route_id', 'trip_id', 'trip_headsign']], 
                   on='route_id', how='left')

# Drop any rows where the route_id doesn't exist in the routes DataFrame
merged_df = merged_df.dropna(subset=['route_id'])

In [3]:
# Create a new DataFrame with only the desired columns
desired_columns = ['route_id', 'trip_id', 'trip_headsign']
routes_with_data = merged_df[desired_columns]

# Export to CSV
routes_with_data.to_csv('routes_with_data.csv', index=False)

# The above line creates a new file named "routes_with_data.csv" in the same directory as your python script

# Now, you can use the following command to create the .lp file that CLINGO expects:
# python -m clingo --lp routes_with_data.lp

In [4]:
import pandas as pd
import io

# Create variables for route_id, trip_id and trip_headsign
route_var = 'route'

# Perform a left merge (add columns from trips DataFrame if route_id exists in both)
merged_df = pd.merge(routes_filtered, trips[['route_id', 'trip_id', 'trip_headsign']], 
                   on='route_id', how='left')

# Drop any rows where the route_id doesn't exist in the routes DataFrame
merged_df = merged_df.dropna(subset=['route_id'])
merged_df['from'] = merged_df['trip_id'].str.split(':').str[3]
merged_df['to'] = merged_df['trip_id'].str.split(':').str[4]

merged_df_dropna = merged_df[['route_id', 'from', 'to', 'trip_id']].drop_duplicates()

# Create a CLINGO file with unique route predicates
with open("routes.lp", "w") as f:
    for index, row in merged_df_dropna.iterrows():
        f.write(f'''{route_var}({row['route_id']}, "{row['trip_id']}", {row['from']}, {row['to']}).\n''')

In [5]:
import re
# Create variables for route_id, trip_id and trip_headsign
route_var = 'departure_time'

# Create a CLINGO file with unique route predicates
with open("departures.lp", "w") as f:
    for index, row in merged_df_dropna.iterrows():
        match = re.search(r':(\d+):\d{8}$', row['trip_id'])  # Match the second-to-last numeric group
        if match:
            number = match.group(1)
            departure = number[:-2]  # Remove the last two digits
            if departure:
                f.write(f'''{route_var}({row['route_id']}, "{row['trip_id']}", {departure}).\n''')

In [6]:
st = "88____:007::8872009:8814209:7:430:20241117"
stl = st.split(':')
print(stl[4])

8814209


In [7]:
import re

strings = [
    "88____:046::8894508:8822608:5:2307:20250119",
    "88____:007::8891009:8841004:37:653:20251107",
    "88____:007::8891009:8841004:37:649:20251212",
    "88____:007::8891009:8841004:39:649:20250221",
    "88____:007::8841004:8891009:37:2350:20251212",
    "88____:007::8841004:8891009:39:2350:20250221"
]

# Extract the desired numbers by removing the last two digits
for s in strings:
    match = re.search(r':(\d+):\d{8}$', s)  # Match the second-to-last numeric group
    if match:
        number = match.group(1)
        result = number[:-2]  # Remove the last two digits
        print(result)


23
6
6
6
23
23
