In [None]:
import pandas as pd
import json
import os

from lib.git_history import iterate_file_versions

In [None]:
input_file = os.path.join("data", "ch.bfe.ladestellen-elektromobilitaet-status.json")
output = os.path.join("data_cache")

status_recs = []
for i, git_commit_at, git_hash, content in iterate_file_versions(input_file):
    versioned_json = json.loads(content)
    timestamp = git_commit_at.isoformat()

    for operator in versioned_json.get("EVSEStatuses", []):
        operator_id = operator.get("OperatorID", "")
        operator_name = operator.get("OperatorName", "")

        for evse in operator.get("EVSEStatusRecord", []):
            evse_id = evse.get("EvseID", "")
            evse_status = evse.get("EVSEStatus", "")

            status_rec = {
                "Timestamp": timestamp,
                "OperatorID": operator_id,
                "OperatorName": operator_name,
                "EvseID": evse_id,
                "EVSEStatus": evse_status
            }
            status_recs.append(status_rec)

    # save every 10 versions as a pickle file
    if (i % 10) == 0:
        df_rec = df = pd.DataFrame(status_recs)
        df_rec["Timestamp"] = pd.to_datetime(df_rec["Timestamp"], utc=True)
        pickle_path = os.path.join("data_cache", f"{i}_{git_hash}.pks")
        df_rec.to_pickle(pickle_path)
        print(f"Saved pickle at {pickle_path}")
        status_recs = []

In [None]:
# Combine pickle files to one dataframe
df_records = pd.concat(
    [pd.read_pickle(os.path.join("data_cache", x)) for x in os.listdir("data_cache") if x.endswith(".pks")]
)

df_records