#  GTFSデータからバスの動きをkepler.glで可視化する

## 概要

- GTFSの停留所の座標(stop.txt)と通過時刻(stop_times.txt)のデータから、タイムスタンプを紐付けた GeoJSONデータを生成する。
- GeoJSONデータをkepler.glで読み込ませてアニメーション表示の可視化を行う

### 注

経路の形状データ(shapes.txt)を利用していないので停留所間は道路に沿わず直線的に移動している

### 利用したGTFSデータ

- [室蘭市オープンデータ 道南バスGTFS](http://www.city.muroran.lg.jp/main/org2260/odlib.php)
- [裾野市自主運行バス　すそのーる　GTFS](http://www.city.susono.shizuoka.jp/shisei/9/1/3254.html)
- [北海道拓殖バス オープンデータ](https://www.takubus.com/%E3%82%AA%E3%83%BC%E3%83%97%E3%83%B3%E3%83%87%E3%83%BC%E3%82%BF/)

### 必要なパッケージのインポート

In [1]:
import os
import zipfile
import pandas as pd
import json

### ディレクトリの構成

dataフォルダにはGTFSのZIPファイルを格納しておく

In [2]:
data_path = "data"
tmp_path = "tmp"
output_path = "output"

### GTFSのZIPファイルから必要なファイルを展開

In [3]:
# GTFSファイルの指定（サンプルは室蘭市のデータ）
gtfs_file = "GTFS_Donanbus_Muroran.zip"

# 必要なファイルを展開しておく
with zipfile.ZipFile(os.path.join(os.getcwd(), f"{data_path}/{gtfs_file}")) as _zip:
    _zip.extract("stops.txt", tmp_path)
    _zip.extract("stop_times.txt", tmp_path)

### 停留所の座標データの読み込み

In [4]:
df_stops = pd.read_csv(os.path.join(os.getcwd(), f"{tmp_path}/stops.txt"))
df_stops = df_stops.dropna(axis='columns', how='all') # 列方向、全てNaNなら除去
df_stops['stop_z'] = 0 # 後でGeoJSONに書き出すため、ダミーの標高値を入れておく
df_stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,zone_id,location_type,parent_station,platform_code,stop_z
0,0001,絵鞆団地,42.332400,140.936739,,1,,,0
1,0002,絵鞆2丁目,42.334350,140.940358,,1,,,0
2,0003,絵鞆中央,42.335863,140.939057,,1,,,0
3,0004,絵鞆公園前,42.336349,140.934134,,1,,,0
4,0005,公営住宅前,42.334403,140.933924,,1,,,0
...,...,...,...,...,...,...,...,...,...
712,0983_B,幡守神社入口,42.381665,140.927368,0983_B,0,983.0,,0
713,0984_B,石川町,42.381350,140.918422,0984_B,0,984.0,,0
714,0990_A,みたら・水族館前,42.341609,140.944495,0990_A,0,990.0,,0
715,0990_B,みたら・水族館前,42.341482,140.944402,0990_B,0,990.0,,0


### 停留所の通過時刻データの読み込み

In [5]:
df_ = pd.read_csv(os.path.join(os.getcwd(), f"{tmp_path}/stop_times.txt"))
df_stop_times = df_.dropna(axis='columns', how='all') # 列方向、全てNaNなら除去
df_stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
0,100310_weekday_1,06:55:00,06:55:00,0391_A,1,3,1
1,100310_weekday_1,06:55:00,06:55:00,0384_A,2,3,3
2,100310_weekday_1,06:56:00,06:56:00,0383_A,3,3,3
3,100310_weekday_1,06:56:00,06:56:00,0382_A,4,3,3
4,100310_weekday_1,06:58:00,06:58:00,0381_A,5,3,3
...,...,...,...,...,...,...,...
24660,132610_weekend_1,06:43:00,06:43:00,0334_B,13,3,3
24661,132610_weekend_1,06:44:00,06:44:00,0333_B,14,3,3
24662,132610_weekend_1,06:45:00,06:45:00,0332_B,15,3,3
24663,132610_weekend_1,06:46:00,06:46:00,0331_B,16,3,3


### 到着時刻を エポック秒に変換

In [6]:
from datetime import datetime

df_stop_times = df_stop_times.assign(time=pd.to_datetime(df_stop_times['arrival_time'],utc=False).map(datetime.timestamp))

df_stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,time
0,100310_weekday_1,06:55:00,06:55:00,0391_A,1,3,1,1581285000.0
1,100310_weekday_1,06:55:00,06:55:00,0384_A,2,3,3,1581285000.0
2,100310_weekday_1,06:56:00,06:56:00,0383_A,3,3,3,1581285000.0
3,100310_weekday_1,06:56:00,06:56:00,0382_A,4,3,3,1581285000.0
4,100310_weekday_1,06:58:00,06:58:00,0381_A,5,3,3,1581285000.0


### 一番早い時刻を 0としてタイムスタンプ（オフセット）を計算

In [7]:
df_stop_times = df_stop_times.assign(timestamp = (df_stop_times['time'] - df_stop_times['time'].min()).astype(int))
df_stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,time,timestamp
0,100310_weekday_1,06:55:00,06:55:00,0391_A,1,3,1,1581285000.0,3300
1,100310_weekday_1,06:55:00,06:55:00,0384_A,2,3,3,1581285000.0,3300
2,100310_weekday_1,06:56:00,06:56:00,0383_A,3,3,3,1581285000.0,3360
3,100310_weekday_1,06:56:00,06:56:00,0382_A,4,3,3,1581285000.0,3360
4,100310_weekday_1,06:58:00,06:58:00,0381_A,5,3,3,1581285000.0,3480


### 停留所の通過時刻と座標のデータをマージ

In [8]:
df = df_stop_times.merge(df_stops, how='left', on='stop_id')
df

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,time,timestamp,stop_name,stop_lat,stop_lon,zone_id,location_type,parent_station,platform_code,stop_z
0,100310_weekday_1,06:55:00,06:55:00,0391_A,1,3,1,1.581285e+09,3300,工大,42.375895,141.035128,0391_A,0,391.0,,0
1,100310_weekday_1,06:55:00,06:55:00,0384_A,2,3,3,1.581285e+09,3300,高砂5丁目,42.373476,141.035426,0384_A,0,384.0,,0
2,100310_weekday_1,06:56:00,06:56:00,0383_A,3,3,3,1.581285e+09,3360,東翔高校前,42.370671,141.035758,0383_A,0,383.0,,0
3,100310_weekday_1,06:56:00,06:56:00,0382_A,4,3,3,1.581285e+09,3360,高砂4丁目,42.368055,141.036063,0382_A,0,382.0,,0
4,100310_weekday_1,06:58:00,06:58:00,0381_A,5,3,3,1.581285e+09,3480,高砂3丁目,42.365754,141.036281,0381_A,0,381.0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24660,132610_weekend_1,06:43:00,06:43:00,0334_B,13,3,3,1.581285e+09,2580,日の出町2丁目,42.352480,141.043950,0334_B,0,334.0,,0
24661,132610_weekend_1,06:44:00,06:44:00,0333_B,14,3,3,1.581285e+09,2640,日の出町1丁目,42.350824,141.040117,0333_B,0,333.0,,0
24662,132610_weekend_1,06:45:00,06:45:00,0332_B,15,3,3,1.581285e+09,2700,寿町2丁目,42.349028,141.036768,0332_B,0,332.0,,0
24663,132610_weekend_1,06:46:00,06:46:00,0331_B,16,3,3,1.581285e+09,2760,寿町3丁目,42.347344,141.033897,0331_B,0,331.0,,0


### GeoJSON形式のデータをファイルに出力

In [10]:
featureCollection = {
    "type":"FeatureCollection",
    "features": []
}

# trip_id をキーにしてグループ化
# [lon, lat, z, timestamp]のリストを出力

for trip, df_ in df.groupby('trip_id'):
    data = df_.loc[:, ['stop_lon','stop_lat','stop_z','timestamp']].to_json(orient='values')
    
    feature = {
        "type":"Feature",
        "properties":{
            "trip_id":trip
        },
        "geometry":{
            "type":"LineString",
            "coordinates":json.loads(data)
        }
    }
    featureCollection["features"].append(feature)
    
with open(os.path.join(os.getcwd(), f"{output_path}/GTFS-trips.geojson"), "w") as fp:
    json.dump(featureCollection, fp, ensure_ascii=False, indent=1)