In [None]:
chengdu_data_config = {
    "data_name": "chengdu",
    "data_path": "/data/hetianran/didi/chengdu/gps_20161101",
    "data_size": 100000,  # None for all data
    "grid_step": 100,  # 100 for 0.1 gps range, 200 for 0.2 gps range
    "road_type": "drive",  # TODO 怎么设置 区别是什么
    "window": (5, 10000, 1),
    "cache_path": "/data/hetianran/cache/chengdu-100K-5_10000_1.data",
}

geolife_data_config = {
    "data_name": "geolife",
    "data_path": "./resource/dataset/Geolife/geolife_small.csv",
    "data_size": 100000,
    "grid_step": 200,
    "road_type": "all",
    "window": (5, 256, 256),
    "cache_path": "/data/hetianran/cache/geolife-100K-5_256_256.data",
}

bj_data_config = {
    "data_name": "bj",
    "data_path": "/data/hetianran/BJ/BJ_shuffled.csv",
    "data_size": 1000000,
    "grid_step": 250,
    "window": (5, 128, 1),
    "cache_path": "",
}

prediction_task_config = {
    "task_name": "prediction",
    "train_mode": "pre-train",  # pre-train, fine-tune, test-only
    "dataset_prop": (0.8, 0.1, 0.1),
    "input_len": 10,
    "output_len": 1,  # only 1
    "token": "grid",  # traj, gps, grid, roadnet
}

similarity_task_config = {
    "task_name": "similarity",
    "train_mode": "test-only",
    "dataset_prop": (0, 0, 1),
    "variant": "original",  # cropped, distorted, original
    "sub-task": "MSS",  # MSS, CDD, kNN
}

filling_task_config = {
    "task_name": "filling",
    "train_mode": "pre-train",
    "dataset_prop": (0.9, 0.1, 0),
    "sub-task": "mlm",  # mlm, autoregressive
    "token": "grid",
}

classification_task_config = {
    "task_name": "classification",
    "train_mode": "fine-tune",
    "dataset_prop": (0.8, 0.1, 0.1),
    "class_attr": "mode",
    "num_classes": 0,  # set by dataset factory
}

embedding_config = {
    "emb_dim": 256,
    "tokens": ["grid"],
    "gps": {
        "emb_name": "linear",
    },
    "grid": {
        "emb_name": "node2vec",
        "vocab_size": 0,  # set by data factory
        "pre-trained": True,
        "embs_path": "./resource/embedding/chengdu_100K_grid_node2vec.pth",
    },
    "roadnet": {
        "emb_name": "embedding",
        "vocab_size": 0,  # set by data factory
        "pre-trained": False,
        "embs_path": "",
    },
}

encoder_config = {
    "encoder_name": "transformer",
    "num_layers": 6,
    "d_model": embedding_config["emb_dim"],
    "num_heads": 8,
    "d_ff": 2048,
    "dropout": 0.1,
}

trainer_config = {
    "model_path": "./resource/backbone/backbone.pth",
    "batch_size": 32,
    "learning_rate": 1e-4,
    "num_epochs": 10,
    "optimizer": "adam",
    "loss_function": "cross_entropy",  # cross_entropy, mse
    "lr_scheduler": "step_lr",
}

config = {
    "data_config": chengdu_data_config,
    "task_config": prediction_task_config,
    "embedding_config": embedding_config,
    "encoder_config": encoder_config,
    "trainer_config": trainer_config,
}

In [None]:
from trajlib.data.data_factory import create_data

data, grid_graph_data, road_graph_data = create_data(config, overwrite=False)

print(len(data.grid))
print(data.roadnet.edge_num)
print(len(data))

print(len(grid_graph_data.nodes), grid_graph_data.to_geo_data().num_nodes)

In [None]:
from accelerate import notebook_launcher

from trajlib.runner.base_runner import BaseRunner


def accelerate_run(config):
    runner = BaseRunner(config)
    runner.run()


# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3"
# accelerate_run(config)

notebook_launcher(accelerate_run, args=(config,), num_processes=4, use_port="29502")
# lsof -i :29500