In [None]:
!gdown --id '1vekmayQiCjQnkQSHbkbeGUpA9xFV4-7Y'

Downloading...
From: https://drive.google.com/uc?id=1vekmayQiCjQnkQSHbkbeGUpA9xFV4-7Y
To: /content/Youbike 1.0 History.json
254MB [00:01, 157MB/s]


In [None]:
import re
import json

from bson import json_util
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


def read_mongoextjson_file(filename):
    with open(filename, "r") as f:
        # read the entire input; in a real application,
        # you would want to read a chunk at a time
        bsondata = f.read()

        # convert the TenGen JSON to Strict JSON
        # here, I just convert the ObjectId and Date structures,
        # but it's easy to extend to cover all structures listed at
        # http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON
        jsondata = re.sub(
            r"ObjectId\s*\(\s*\"(\S+)\"\s*\)", r'{"$oid": "\1"}', bsondata
        )
        jsondata = re.sub(r"ISODate\s*\(\s*(\S+)\s*\)", r'{"$date": \1}', jsondata)
        jsondata = re.sub(
            r"NumberInt\s*\(\s*(\S+)\s*\)", r'{"$numberInt": "\1"}', jsondata
        )

        # now we can parse this as JSON, and use MongoDB's object_hook
        # function to get rich Python data structures inside a dictionary
        data = json.loads(jsondata, object_hook=json_util.object_hook)

        return data

In [None]:
data = read_mongoextjson_file("Youbike 1.0 History.json")
data = pd.DataFrame(data)

In [None]:
data

Unnamed: 0,_id,status,station_no,name_tw,district_tw,address_tw,lat,lng,bikes
0,60bc37a23ffb89b975dd9e81,1,2389,南順七街32巷口,蘆竹區,南順七街35號旁人行道,25.039407,121.290934,"[{'updated_at': '2021-06-06 10:48:05', 'parkin..."
1,60bc37a23ffb89b975dd9e83,2,2254,大潤發中壢店,中壢區,中北路二段468號南側人行道,24.95524533,121.2348423,"[{'updated_at': '2021-06-06 10:48:05', 'parkin..."
2,60bc37a23ffb89b975dd9e85,1,2390,復興路朝陽街口,桃園區,復興路51號旁停車場,24.990843,121.31611,"[{'updated_at': '2021-06-06 10:48:05', 'parkin..."
3,60bc37a33ffb89b975dd9e87,1,1746,矽谷園區,汐止區,中興路45-1號(旁),25.062270,121.634470,"[{'updated_at': '2021-06-06 10:48:05', 'parkin..."
4,60bc37a33ffb89b975dd9e89,1,1170,延和社區公園,土城區,延和路180號(旁),24.990270,121.468910,"[{'updated_at': '2021-06-06 10:48:05', 'parkin..."
...,...,...,...,...,...,...,...,...,...
1910,60bc3dfd753a95aee7289abc,1,0274,扶輪親恩公園,內湖區,民權東路六段13之15號對面人行道(民權大橋)(鄰近華生水水體驗館),25.066997,121.579833,"[{'updated_at': '2021-06-06 11:02:07', 'parkin..."
1911,60bc3dfd753a95aee7289abd,1,0275,西本願寺廣場,萬華區,中華路一段/長沙街二段路口西南側人行道(西本願寺)(鄰近國軍歷史文物館/中山堂/西門紅樓),25.040988,121.507688,"[{'updated_at': '2021-06-06 11:02:07', 'parkin..."
1912,60bc3dfe753a95aee7289abe,1,0276,三興公園,信義區,吳興街118巷35弄28號前方(三興公園),25.028679,121.559320,"[{'updated_at': '2021-06-06 11:02:07', 'parkin..."
1913,60bc3dfe753a95aee7289abf,1,0277,中山堂,中正區,延平南路/武昌街一段東南角人行道(延平武昌街口)(鄰近中山堂/西門町),25.044091,121.510250,"[{'updated_at': '2021-06-06 11:02:07', 'parkin..."


In [None]:
del data["_id"]
del data["station_no"]

In [None]:
traffic_data = []
for bikedata in data["bikes"]:
    last = bikedata[0]["available_spaces"]
    traffic = 0
    for i in bikedata:
        traffic += abs(i["available_spaces"] - last)
        last = i["available_spaces"]
    traffic_data.append(traffic)
data["traffic"] = traffic_data

In [None]:
data["traffic"].describe()

count    1915.000000
mean       30.502350
std        42.176724
min         0.000000
25%         9.000000
50%        19.000000
75%        36.000000
max       462.000000
Name: traffic, dtype: float64

In [None]:
data[data["traffic"] == 462]

Unnamed: 0,status,name_tw,district_tw,address_tw,lat,lng,bikes,traffic
156,1,彰化火車站前站,彰化市,中正路一段/光復路口,24.08116289,120.539128,"[{'updated_at': '2021-06-06 10:52:07', 'parkin...",462


In [None]:
import plotly.express as px

fig = px.scatter_geo(
    data,
    lat="lat",
    lon="lng",
    hover_name="name_tw",
    size="traffic",
    scope="asia",
    color="district_tw",
)
fig.show()

In [None]:
import folium
def generateBaseMap(default_location=[25.039407, 121.290934], default_zoom_start=12):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

In [None]:
df_copy

Unnamed: 0,status,name_tw,district_tw,address_tw,lat,lng,bikes,traffic
0,1,南順七街32巷口,蘆竹區,南順七街35號旁人行道,25.039407,121.290934,"[{'updated_at': '2021-06-06 10:48:05', 'parkin...",60
1,2,大潤發中壢店,中壢區,中北路二段468號南側人行道,24.95524533,121.2348423,"[{'updated_at': '2021-06-06 10:48:05', 'parkin...",0
2,1,復興路朝陽街口,桃園區,復興路51號旁停車場,24.990843,121.31611,"[{'updated_at': '2021-06-06 10:48:05', 'parkin...",30
3,1,矽谷園區,汐止區,中興路45-1號(旁),25.062270,121.634470,"[{'updated_at': '2021-06-06 10:48:05', 'parkin...",74
4,1,延和社區公園,土城區,延和路180號(旁),24.990270,121.468910,"[{'updated_at': '2021-06-06 10:48:05', 'parkin...",103
...,...,...,...,...,...,...,...,...
1910,1,扶輪親恩公園,內湖區,民權東路六段13之15號對面人行道(民權大橋)(鄰近華生水水體驗館),25.066997,121.579833,"[{'updated_at': '2021-06-06 11:02:07', 'parkin...",7
1911,1,西本願寺廣場,萬華區,中華路一段/長沙街二段路口西南側人行道(西本願寺)(鄰近國軍歷史文物館/中山堂/西門紅樓),25.040988,121.507688,"[{'updated_at': '2021-06-06 11:02:07', 'parkin...",15
1912,1,三興公園,信義區,吳興街118巷35弄28號前方(三興公園),25.028679,121.559320,"[{'updated_at': '2021-06-06 11:02:07', 'parkin...",20
1913,1,中山堂,中正區,延平南路/武昌街一段東南角人行道(延平武昌街口)(鄰近中山堂/西門町),25.044091,121.510250,"[{'updated_at': '2021-06-06 11:02:07', 'parkin...",12


In [None]:
df_copy["lat"] = pd.to_numeric(df_copy["lat"])
df_copy["lng"] = pd.to_numeric(df_copy["lng"])

In [None]:
df_copy.dtypes

status          int64
name_tw        object
district_tw    object
address_tw     object
lat            object
lng            object
bikes          object
traffic         int64
dtype: object

In [None]:
from folium.plugins import HeatMap
df_copy = data.copy()
base_map = generateBaseMap()

HeatMap(data=list(zip(pd.to_numeric(df_copy["lat"]), pd.to_numeric(df_copy["lng"]), df_copy["traffic"]))).add_to(base_map)

<folium.plugins.heat_map.HeatMap at 0x7fede4e8ce10>

In [None]:
base_map