In [6]:
import pandas as pd
import json

from trajlib.data_processing.utils.data_definition import TrajectoryData
from tqdm import tqdm

data = pd.read_csv(
    r".\resource\dataset\Porto\porto_sample.csv",
    on_bad_lines="warn",
    nrows=10,
)

point_counter = 0
traj_data = TrajectoryData()
for index, row in tqdm(data.iterrows()):
    gps_loc_list = json.loads(
        row["POLYLINE"]
    )  # 解析为坐标点列表，格式通常是[[lng, lat], [lng, lat], ...]

    timestamp_list = [
        (row["TIMESTAMP"] + 15 * i) * 1000000000 for i in range(len(gps_loc_list))
    ]  # 每个时间戳间隔15秒, *1000000000是为了符合keplergl和tbd的调包要求

    id_list = [
        row["TRIP_ID"] for _ in range(len(gps_loc_list))
    ]  # 所有坐标点的 TRIP_ID 相同

    lng_list = [loc[0] for loc in gps_loc_list]  # 获取每个坐标点的经度
    lat_list = [loc[1] for loc in gps_loc_list]  # 获取每个坐标点的纬度

    data_1 = []
    data_2 = []
    for traj_id, timestamp, lon, lat in zip(
        id_list, timestamp_list, lng_list, lat_list
    ):
        data_1.append(
            {
                "point_id": point_counter,
                "timestamp": timestamp,
                "traj_id": traj_id,
            }
        )
        data_2.append({"lon": lon, "lat": lat})
        point_counter += 1

    if len(data_1) == 0:
        continue

    traj_data.batch_append_point_data(
        new_point_data_list=data_1, extra_attr_list=data_2
    )

    traj_data.traj_table = traj_data.traj_table._append(
        {"traj_id": traj_id}, ignore_index=True
    )

  pd.concat([self.point_table, new_row_df], ignore_index=True)
10it [00:00, 430.19it/s]


In [7]:
import transbigdata as tbd


def clean_traj(
    point_table,
    iter_clean,
    dislimit=1000,
    anglelimit=30,
    speedlimit=180,
    method="oneside",
):
    for i in range(iter_clean):
        point_table = tbd.traj_clean_drift(
            data=point_table,
            col=["traj_id", "timestamp", "lon", "lat"],
            dislimit=dislimit,
            anglelimit=anglelimit,
            speedlimit=speedlimit,
            method=method,
        )
    return point_table


traj_data.point_table = clean_traj(
    point_table=traj_data.point_table,
    iter_clean=10,
)

tbd.visualization_trip(
    trajdata=traj_data.point_table,
    col=["lon", "lat", "traj_id", "timestamp"],
    height=800,
    zoom="auto",
)

Processing trajectory data...
Generate visualization...
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'hizm36i', 'type': …

In [8]:
import trajlib.data_processing.utils.map_match as mm
import importlib

importlib.reload(mm)

map_con, nodes, edges = mm.get_roadnetwork(
    bounds=[
        traj_data.point_table["lon"].min(),
        traj_data.point_table["lat"].min(),
        traj_data.point_table["lon"].max(),
        traj_data.point_table["lat"].max(),
    ],
    cache_dir="./",
    network_type="drive",
)
print("map network loaded")
traj_data, geo_data, geo_rel_data = mm.match_traj_data_with_roadnetwork(
    traj_data, map_con=map_con, nodes=nodes, edges=edges
)

map network loaded
Starting trajectory matching...


100%|██████████| 10/10 [00:04<00:00,  2.02it/s]
  matched_edge = edges.loc[edge]


Trajectory matching completed, time taken: 4.949145078659058 seconds
Starting processing matching states...
Processing matching states completed, time taken: 0.012656450271606445 seconds
Starting updating point table...
Updating point table completed, time taken: 0.052945613861083984 seconds
Starting creating geo data...
Creating geo data completed, time taken: 0.2606618404388428 seconds
Starting creating geo relation data...
Creating geo relation data completed, time taken: 0.0014929771423339844 seconds
All phases completed


In [9]:
# 所有道路段的信息，geo_id 是道路的id
geo_data.info_table

Unnamed: 0,geo_id,type,coord
0,0,road,"LINESTRING (-8.6208752 41.1496431, -8.6209479 ..."
1,1,road,"LINESTRING (-8.6082129 41.1429961, -8.6083272 ..."
2,2,road,"LINESTRING (-8.662957 41.1636419, -8.6631404 4..."
3,3,road,"LINESTRING (-8.6041052 41.1427685, -8.6046067 ..."
4,4,road,"LINESTRING (-8.5870529 41.147268, -8.5870685 4..."
...,...,...,...
344,344,road,"LINESTRING (-8.5902558 41.149964, -8.5909309 4..."
345,345,road,"LINESTRING (-8.5950103 41.14896, -8.5956944 41..."
346,346,road,"LINESTRING (-8.5776128 41.1447768, -8.5776345 ..."
347,347,road,"LINESTRING (-8.5990569 41.1491025, -8.5989745 ..."


In [10]:
# 所有轨迹点的信息，额外添加了road_id列表示每个轨迹点归属的道路段的geo_id
traj_data.point_table[-30:]

Unnamed: 0,point_id,traj_id,timestamp,lon,lat,road_id
30,30,1372637303620000596,1372637408000000000,-8.656434,41.16258,195.0
31,31,1372637303620000596,1372637423000000000,-8.660178,41.163192,14.0
32,32,1372637303620000596,1372637438000000000,-8.663112,41.163687,310.0
33,33,1372637303620000596,1372637453000000000,-8.666235,41.1642,298.0
34,34,1372637303620000596,1372637468000000000,-8.669169,41.164704,203.0
35,35,1372637303620000596,1372637483000000000,-8.670852,41.165136,233.0
36,36,1372637303620000596,1372637498000000000,-8.670942,41.166576,97.0
37,37,1372637303620000596,1372637513000000000,-8.66961,41.167962,31.0
38,38,1372637303620000596,1372637528000000000,-8.668098,41.168988,186.0
39,39,1372637303620000596,1372637543000000000,-8.66664,41.170005,125.0
