In [6]:
import datetime
import gzip
import json
from pathlib import Path
from io import SEEK_END
import struct
import polars as pl
import zipfile

def parse_filename(fname):
    ts_str, rest = Path(fname).name.rsplit("-", 1)
    part = int(rest.split(".", 1)[0])
    ts = datetime.datetime.strptime(ts_str, "%Y-%m-%d_%H_%M_%S")
    return ts, part
  
def get_uncompressed_size(gzf):
    # !!! this is brittle, assumes < 4GB files which should be ok for now
    gzf.seek(-4, SEEK_END)
    uncompressed_size = struct.unpack("I", gzf.read(4))[0]
    gzf.seek(0)
    return uncompressed_size


def get_num_stations(gzf):
    with gzip.GzipFile(fileobj=gzf) as f:
        d = json.load(f)
        if "data" in d and "supply" in d["data"] and "stations" in d["data"]["supply"]:
            return len(d["data"]["supply"]["stations"])
    return 0

def get_data(gzf):
    gzf.seek(0)
    with gzip.GzipFile(fileobj=gzf) as f:
        d = json.load(f)
        return d


zf = zipfile.ZipFile("2024-11-08-compressed.zip", "r")

for i, entry in enumerate(zf.infolist()):
    if entry.is_dir():
        continue
    ts, part_num = parse_filename(entry.filename)
    print(entry.filename)
    print(" ", ts, part_num)
    with zf.open(entry.filename) as f:
        compressed_sz = entry.file_size
        uncompressed_sz = get_uncompressed_size(f)
        print(" ", compressed_sz, uncompressed_sz)

        num_stations = get_num_stations(f)
        d = get_data(f)
        print(" ", num_stations)
    if i >= 10:
        break


2024-11-08/2024-11-08_03_56_29-1.json.gz
  2024-11-08 03:56:29 1
  18918 262073
  0
2024-11-08/2024-11-08_03_57_29-1.json.gz
  2024-11-08 03:57:29 1
  18994 261615
  0
2024-11-08/2024-11-08_03_58_29-1.json.gz
  2024-11-08 03:58:29 1
  18943 261447
  0
2024-11-08/2024-11-08_03_59_30-1.json.gz
  2024-11-08 03:59:30 1
  19057 262503
  0
2024-11-08/2024-11-08_04_00_30-1.json.gz
  2024-11-08 04:00:30 1
  18943 261230
  0
2024-11-08/2024-11-08_04_01_30-4.json.gz
  2024-11-08 04:01:30 4
  12151 163608
  0
2024-11-08/2024-11-08_04_02_30-4.json.gz
  2024-11-08 04:02:30 4
  12116 163168
  0
2024-11-08/2024-11-08_04_03_30-4.json.gz
  2024-11-08 04:03:30 4
  12026 161819
  0
2024-11-08/2024-11-08_04_04_30-4.json.gz
  2024-11-08 04:04:30 4
  12019 161831
  0
2024-11-08/2024-11-08_04_05_30-4.json.gz
  2024-11-08 04:05:30 4
  12060 162275
  0


In [12]:
df = pl.DataFrame(d['data']['supply']['rideables'])
df

rideableId,rideableName,location,rideableType,photoUrl,batteryStatus,__typename
str,str,struct[3],str,str,struct[3],str
"""1891146379260463558""","""···-3706""","{41.903084,-87.757459,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{15,""miles"",""Distance""},52,""BatteryStatus""}","""Rideable"""
"""1891146515095258276""","""···-5342""","{41.922262,-87.687727,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{8,""miles"",""Distance""},34,""BatteryStatus""}","""Rideable"""
"""1891146518565821062""","""···-6795""","{41.958865,-87.784335,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{8,""miles"",""Distance""},36,""BatteryStatus""}","""Rideable"""
"""1891146518668050936""","""···-2966""","{41.914121,-87.627849,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{1,""miles"",""Distance""},18,""BatteryStatus""}","""Rideable"""
"""1891146519062372058""","""···-9172""","{41.998183,-87.656597,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{19,""miles"",""Distance""},61,""BatteryStatus""}","""Rideable"""
…,…,…,…,…,…,…
"""1927385027798625932""","""···-0036""","{41.902288,-87.699462,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{8,""miles"",""Distance""},36,""BatteryStatus""}","""Rideable"""
"""1927385126065828480""","""···-2564""","{41.739154,-87.554049,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{4,""miles"",""Distance""},26,""BatteryStatus""}","""Rideable"""
"""1927385146149317198""","""···-5516""","{41.913645,-87.71165,""Location""}","""ELECTRIC_BIKE""","""https://cdn.lyft.com/static/la…","{{12,""miles"",""Distance""},44,""BatteryStatus""}","""Rideable"""
"""1947382333507678226""","""···-940""","{41.865063,-87.71458,""Location""}","""ELECTRIC_SCOOTER""","""https://cdn.lyft.com/static/la…","{{8,""miles"",""Distance""},44,""BatteryStatus""}","""Rideable"""
