In [3]:
import pandas as pd
import geopandas as gpd
import osmnx as ox

# ----------------------------------------------------------
# 1. Load point dataset as GeoDataFrame
# ----------------------------------------------------------
tmc = pd.read_excel('../raw_data/Thailand_T19_v3.2_flat_Thai.xlsx')

tmc_gdf = gpd.GeoDataFrame(
    tmc,
    geometry=gpd.points_from_xy(tmc.LONGITUDE, tmc.LATITUDE),
    crs="EPSG:4326"   # WGS84
)

# ----------------------------------------------------------
# 2. Load Bangkok polygon from OSM using its OSM ID
# ----------------------------------------------------------
bangkok = ox.geocode_to_gdf("R92277", by_osmid=True)

# ----------------------------------------------------------
# 3. Project both datasets to UTM (meters) for buffering
#    Bangkok is in UTM zone 47N → EPSG:32647
# ----------------------------------------------------------
tmc_utm = tmc_gdf.to_crs(32647)
bangkok_utm = bangkok.to_crs(32647)

# Extract polygon
poly_utm = bangkok_utm.geometry.iloc[0]

# ----------------------------------------------------------
# 4. Create a 500m buffer around the Bangkok polygon
# ----------------------------------------------------------
poly_buffered = poly_utm.buffer(1000)  # 1000 meters

# ----------------------------------------------------------
# 5. Filter all points that intersect the buffered polygon
# ----------------------------------------------------------
inside_1km = tmc_utm[tmc_utm.intersects(poly_buffered)]

# ----------------------------------------------------------
# 6. (Optional) Print results
# ----------------------------------------------------------
print(f"Total points: {len(tmc_utm)}")
print(f"Points inside buffered Bangkok: {len(inside_1km)}")



Total points: 23239
Points inside buffered Bangkok: 4463


  return lib.buffer(


In [5]:
inside_1km.columns

Index(['CC', 'CID', 'VERSION NR', 'TABLE', 'LOCATION CODE', '(SUB)TYPE',
       'ROAD NUMBER', 'ROAD NAME', 'JUNCTION NUMBER', 'FIRST NAME',
       'SECOND NAME', 'AREA REFERENCE', 'LINEAR REFERENCE', 'NEGATIVE OFFSET',
       'POSITIVE OFFSET', 'INTERSECTION REFS', 'LATITUDE', 'LONGITUDE',
       'URBAN', 'INTERRUPT', 'ISOLATED', 'IN+', 'OUT+', 'IN-', 'OUT-',
       'PRESENT+', 'PRESENT-', 'geometry'],
      dtype='object')

In [6]:
node_df = inside_1km[["LOCATION CODE", "ROAD NAME", "LATITUDE", "LONGITUDE", 'NEGATIVE OFFSET', 'POSITIVE OFFSET']]
node_df.to_csv("bkk_loc_table.csv", index=False)

In [7]:
node_df[node_df["LOCATION CODE"] > 46130]

Unnamed: 0,LOCATION CODE,ROAD NAME,LATITUDE,LONGITUDE,NEGATIVE OFFSET,POSITIVE OFFSET
156,46138,ถนนสุขสวัสดิ์,13.64094,100.52618,46137.0,46139.0
157,46139,ถนนสุขสวัสดิ์,13.64987,100.52248,46138.0,46140.0
158,46140,ถนนสุขสวัสดิ์,13.65879,100.51889,46139.0,46141.0
159,46141,ถนนสุขสวัสดิ์,13.66404,100.51696,46140.0,46142.0
160,46142,ถนนสุขสวัสดิ์,13.66861,100.51350,46141.0,46143.0
...,...,...,...,...,...,...
23234,54849,ถนนวิภาวดีรังสิต,13.80668,100.55995,54848.0,54850.0
23235,54850,ถนนวิภาวดีรังสิต,13.82521,100.55802,54849.0,54851.0
23236,54851,ถนนวิภาวดีรังสิต,13.84402,100.55955,54850.0,54852.0
23237,54852,ถนนวิภาวดีรังสิต,13.86241,100.56944,54851.0,54853.0


In [14]:
# Fix NEGATIVE OFFSET
node_df["NEGATIVE OFFSET"] = node_df["NEGATIVE OFFSET"].where(
    node_df["NEGATIVE OFFSET"].isin(node_df.index),
    np.nan
)

# Fix POSITIVE OFFSET
node_df["POSITIVE OFFSET"] = node_df["POSITIVE OFFSET"].where(
    node_df["POSITIVE OFFSET"].isin(node_df.index),
    np.nan
)

node_df["has_valid_neighbor"] = (
    node_df["POSITIVE OFFSET"].isin(node_df.index) |
    node_df["NEGATIVE OFFSET"].isin(node_df.index)
)

In [15]:
node_df_filtered = node_df[node_df["has_valid_neighbor"] == True]

In [19]:
node_df_filtered[node_df_filtered["NEGATIVE OFFSET"].isna()]

Unnamed: 0_level_0,ROAD NAME,FIRST NAME,SECOND NAME,LATITUDE,LONGITUDE,NEGATIVE OFFSET,POSITIVE OFFSET,has_valid_neighbor
LOCATION CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
46141,Suksawat Road,Industrial Ring Rd.,,13.66404,100.51696,,46142.0,True
46150,Chaeng Watthana Road,Uttaraphimuk Elevated Tollway,,13.88413,100.58158,,46151.0,True
8954,Ekkachai Road,Soi Ekkachai 131/Soi Phet Kasem 81,,13.63854,100.37195,,8955.0,True
54786,Rama 2,Rama 2 Soi 82/Bang Kradi Road,,13.63267,100.39982,,15015.0,True
46248,,Prasert-Manukitch Road,,13.81561,100.64215,,46249.0,True
...,...,...,...,...,...,...,...,...
54823,Rama 8 Bridge,Somdet Phra Pin Klao Road/ Borommaratchachonna...,,13.76866,100.48603,,54824.0,True
54826,Boromratchachonnani Elevated Road,Rama 8 Bridge/Arun Ammarin Road,,13.76656,100.48728,,54827.0,True
53243,Thawiwatthana-Kanchanaphisek Road,Thawiwatthana Road,,13.76650,100.34545,,54856.0,True
54844,Din Daeng Road/ Vibhavadi-Rangsit Road,Din Daeng Road,,13.76344,100.54535,,54845.0,True


In [20]:
node_df_filtered.reset_index(inplace=True)
node_df_filtered.to_csv("../processed_data/bkk_loc_table.csv", index=False)

In [6]:
connection_df = inside[["LOCATION CODE", "NEGATIVE OFFSET", "POSITIVE OFFSET", "LATITUDE", "LONGITUDE"]]

In [7]:
idx_left = np.isin(connection_df["NEGATIVE OFFSET"], connection_df["LOCATION CODE"])
idx_right = np.isin(connection_df["POSITIVE OFFSET"], connection_df["LOCATION CODE"])

In [8]:
edge_df = pd.DataFrame(columns=['start_loc', 'end_loc'])

In [9]:
temp_df = connection_df.loc[idx_left, ["NEGATIVE OFFSET", "LOCATION CODE"]]
temp_df = temp_df.rename({"NEGATIVE OFFSET":"start_loc", "LOCATION CODE":"end_loc"}, axis="columns").astype(np.int64)
temp_df = temp_df.astype(np.int64)
edge_df = pd.concat([edge_df, temp_df], ignore_index=True)

In [10]:
temp_df = connection_df.loc[idx_right, ["LOCATION CODE", "POSITIVE OFFSET"]]
temp_df = temp_df.rename({"POSITIVE OFFSET":"end_loc", "LOCATION CODE":"start_loc"}, axis="columns").astype(np.int64)
temp_df = temp_df.astype(np.int64)
edge_df = pd.concat([edge_df, temp_df], ignore_index=True)

In [11]:
temp_df = connection_df.loc[idx_left, ["LOCATION CODE", "NEGATIVE OFFSET"]]
temp_df = temp_df.rename({"NEGATIVE OFFSET":"end_loc", "LOCATION CODE":"start_loc"}, axis="columns").astype(np.int64)
temp_df = temp_df.astype(np.int64)
edge_df = pd.concat([edge_df, temp_df], ignore_index=True)

In [12]:
temp_df = connection_df.loc[idx_right, ["POSITIVE OFFSET", "LOCATION CODE"]]
temp_df = temp_df.rename({"POSITIVE OFFSET":"start_loc", "LOCATION CODE":"end_loc"}, axis="columns").astype(np.int64)
temp_df = temp_df.astype(np.int64)
edge_df = pd.concat([edge_df, temp_df], ignore_index=True)

In [11]:
edge_df = edge_df.drop_duplicates()

In [12]:
unique_point = connection_df[["LATITUDE", "LONGITUDE"]].drop_duplicates()

In [13]:
for idx, (lat, long) in unique_point.iterrows():
    temp_df = connection_df[["LOCATION CODE", "LATITUDE", "LONGITUDE"]]
    temp_df = temp_df[(temp_df["LATITUDE"] == lat) & (temp_df["LONGITUDE"] == long)]
    if temp_df.shape[0] > 1:
        new_df = pd.DataFrame({"start_loc":temp_df.iloc[:-1, 0].to_numpy(), "end_loc":temp_df.iloc[1:, 0].to_numpy()})
        edge_df = pd.concat([edge_df, new_df])

In [14]:
edge_df

Unnamed: 0,start_loc,end_loc
0,46141,46142
1,46142,46143
2,46143,46144
3,46144,46145
4,46145,46146
...,...,...
0,53417,53419
0,53430,53432
0,53436,53438
0,53667,53669


In [15]:
edge_df.to_csv("../processed_data/bkk_edge_table.csv", index=False)