In [1]:
import pandas as pd
import os
import graph_tool.all as gt
import h3
import pyproj
geod = pyproj.Geod(ellps="WGS84")

In [2]:
path = 'cdmx-gtfs'
# Cargar GTFS
stops = pd.read_csv(os.path.join(path, "stops.txt"))
stop_times = pd.read_csv(os.path.join(path, "stop_times.txt"))
trips = pd.read_csv(os.path.join(path,"trips.txt"))
routes = pd.read_csv(os.path.join(path,"routes.txt"))

In [6]:
if 'cdmx' in path:
    trips = trips[trips.route_id.isin(routes[routes.agency_id.isin(['CC','RTP'])].route_id.unique())].copy()
    routes = routes[routes.agency_id.isin(['CC','RTP'])].copy()

In [7]:
# 1. Secuencia de stops por trip
stop_sequences = stop_times.sort_values(['trip_id', 'stop_sequence']) \
    .groupby('trip_id')['stop_id'].apply(tuple).reset_index()

# 2. Añadir info de ruta
stop_sequences = stop_sequences.merge(trips.loc[trips.direction_id == 0,['trip_id', 'route_id']], on='trip_id')

# 3. Quitar duplicados por (route_id, secuencia)
unique_sequences = stop_sequences.drop_duplicates(subset=['route_id', 'stop_id'])

# Generar enlaces consecutivos
edges = []
for trip_id, group in unique_sequences.groupby('trip_id'):
    group = group.reset_index(drop=True)
    route_id = group.route_id.values[0]
    nodes = group.stop_id.values[0]
    if len(nodes) > 2:
        for i, j in zip(nodes[:-1], nodes[1:]):
            edges.append((i, j, route_id, trip_id))

edges_df = pd.DataFrame(edges, columns=['nodo_i', 'nodo_j', 'name', 'trip'])

# Opcional: asociar nombres de paradas y rutas
edges_df = edges_df.merge(stops[['stop_id', 'stop_name']], left_on='nodo_i', right_on='stop_id', how='left') \
                   .rename(columns={'stop_name': 'nombre_i'}) \
                   .drop(columns=['stop_id'])

edges_df = edges_df.merge(stops[['stop_id', 'stop_name']], left_on='nodo_j', right_on='stop_id', how='left') \
                   .rename(columns={'stop_name': 'nombre_j'}) \
                   .drop(columns=['stop_id'])

edges_df = edges_df.merge(routes[['route_id', 'route_long_name']], left_on='name', right_on='route_id', how='left') \
                   .drop(columns=['route_id'])



In [8]:
res = 10
stops.loc[:,'cell'] = stops.apply(lambda x: h3.latlng_to_cell(x.stop_lat, x.stop_lon,res), axis = 1)

edges_df.loc[:,'cell_i'] = edges_df.merge(stops[['stop_id','cell']], left_on = 'nodo_i', right_on = 'stop_id').cell
edges_df.loc[:,'cell_j'] = edges_df.merge(stops[['stop_id','cell']], left_on = 'nodo_j', right_on = 'stop_id').cell

In [9]:
nodes2int = pd.Series({n : i for n, i in enumerate(set(edges_df.cell_i).union(set(edges_df.cell_j)))}).reset_index()
nodes2int.rename({'index' : 'node_int', 0 : 'node_id'}, axis = 1, inplace=True)


edges_df.loc[:,'int_i'] = edges_df.merge(nodes2int, left_on = 'cell_i', right_on = 'node_id').node_int
edges_df.loc[:,'int_j'] = edges_df.merge(nodes2int, left_on = 'cell_j', right_on = 'node_id').node_int

In [10]:
g = gt.Graph(edges_df[['int_i', 'int_j', 'trip']].values.tolist(), eprops = [('name','object')], directed=False)
gt.remove_self_loops(g)

In [11]:
pos = g.new_vertex_property('vector<double>')
ids = g.new_vertex_property('object')
for v in g.vertices():
    lat, lon = h3.cell_to_latlng(nodes2int.loc[nodes2int.node_int == v].node_id.values[0])
    pos[v] = (lon, -lat)
    ids[v] = nodes2int.loc[nodes2int.node_int == v].node_id.values[0]
g.vp.pos = pos
g.vp.ids = ids
weight = g.new_edge_property('double')
for e in g.edges():
    u = e.source()
    v = e.target()
    _,_, dist = geod.inv(pos[u][0],
                         -pos[u][1],
                         pos[v][0],
                         -pos[v][1]
                         )
    weight[e] = dist
g.ep.weight = weight


In [18]:
edges_df.trip.value_counts()

trip
051162D000_0    94
010017K000_0    83
013009A000_0    79
010024B000_0    78
010017H000_0    77
                ..
054300B000_0     7
051SEL1001_0     6
051SEL1003_0     5
051SEL1002_0     4
050300A000_0     2
Name: count, Length: 272, dtype: int64

In [19]:
g.save('redes/redCDMX.gt')