In [39]:
from pymongo import MongoClient
from tqdm import tqdm_notebook
from datetime import time, datetime, timedelta
import pandas as pd

client = MongoClient()
onboard_telemetry_collection = client.sphere_telemetry.from_onboard_logs

In [2]:
def gps_stamp_to_timedelta(stamp: int) -> timedelta:
	stamp = str(stamp).rjust(6, '0')
	return timedelta(hours=int(stamp[:2]), minutes=int(stamp[2:4]), seconds=int(stamp[4:]))

In [8]:
utc_tolerance = timedelta(hours = 0, seconds=60)

def gps_time_day_switch(utc_time, prev_utc_time) -> bool:
    return utc_time < prev_utc_time - utc_tolerance

In [13]:
def write_header():
    f.write(f'i,"switch_type","local_change","local_delta","utc_change","utc_delta","new_utc_stamp"\n')

def write_row(local_flag, utc_flag):
    if not (local_flag or utc_flag):
        return False
    if local_flag:
        if utc_flag:
            type_ = 'both'
        else:
            type_ = 'local'
    else:
        type_ = 'utc'
    row = (
        f'{row_count},{type_},"{prev_local_dt} -> {local_dt}","{local_dt - prev_local_dt}",'
        + f'"{prev_utc_time} -> {utc_time}","{utc_time-prev_utc_time}","{utc_timestamp}"'
    )
    f.write(row + '\n')
    return True


query = {"GPS_stamp": {"$exists": True}}
total = onboard_telemetry_collection.find(query).count()

prev_local_dt = datetime.fromtimestamp(0)
prev_utc_time = timedelta(hours=23, minutes=59, seconds=59)

row_count = 0

with open('day_switches.csv', 'w') as f:
    write_header()
    for doc in tqdm_notebook(
        onboard_telemetry_collection.find(query).sort("local_dt"),
        total=total
    ):
        local_dt = doc['local_dt']
        utc_timestamp = doc['GPS_stamp']
        utc_time = gps_stamp_to_timedelta(utc_timestamp)
        
        utc_time_switch = gps_time_day_switch(utc_time, prev_utc_time)
        if write_row(local_dt.day != prev_local_dt.day, utc_time_switch):
            row_count += 1

        prev_local_dt = local_dt
        prev_utc_time = utc_time

print(f'{switch_count} day switches found')


HBox(children=(FloatProgress(value=0.0, max=299250.0), HTML(value='')))


132 day switches found


Результат работы предыдущей ячейки выгружен [сюда](https://docs.google.com/spreadsheets/d/1wIniLV3XItgZoUVZ_wDRf4xhny8OmiRFmMkAfX3oLfE/edit#gid=1486757713&fvid=2114670170), вручную размечены правильные времена смены дат по UTC, далее первый лист из того же документа экспротирован в .csv и обрабатывается ниже.

In [48]:
df = pd.read_csv('data\\utc-dates-restored.csv')
df_dict = df.to_dict('list')
switch_local_dt = [datetime.strptime(dt_str, r'%Y-%m-%d %X') for dt_str in df_dict['local_dt']]
new_utc_date = df_dict['new utc date']

for idx, (start_ldt, end_ldt) in enumerate(
    zip(switch_local_dt, [*switch_local_dt[1:], datetime.now()])
):
    utc_dt_base = datetime.strptime(new_utc_date[idx], r'%Y-%m-%d')
    print(f'inserting UTC date {utc_dt_base}')

    utc_date_query = {"GPS_stamp": {"$exists": True}, "local_dt": {"$gte": start_ldt, "$lt": end_ldt}}

    total = onboard_telemetry_collection.find(utc_date_query).count()
    for doc in tqdm_notebook(
        onboard_telemetry_collection.find(utc_date_query),
        total=total
    ):
        utc_dt = utc_dt_base + gps_stamp_to_timedelta(doc['GPS_stamp'])
        onboard_telemetry_collection.update_one(
            filter={"local_dt": doc["local_dt"]},
            update={"$set": {"utc_dt": utc_dt}}
        )

inserting UTC date 2010-03-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=13975.0), HTML(value='')))


inserting UTC date 2010-03-19 00:00:00


HBox(children=(FloatProgress(value=0.0, max=1660.0), HTML(value='')))


inserting UTC date 2011-02-27 00:00:00


HBox(children=(FloatProgress(value=0.0, max=6898.0), HTML(value='')))


inserting UTC date 2011-02-28 00:00:00


HBox(children=(FloatProgress(value=0.0, max=1903.0), HTML(value='')))


inserting UTC date 2011-03-01 00:00:00


HBox(children=(FloatProgress(value=0.0, max=119.0), HTML(value='')))


inserting UTC date 2011-03-02 00:00:00


HBox(children=(FloatProgress(value=0.0, max=8677.0), HTML(value='')))


inserting UTC date 2011-03-03 00:00:00


HBox(children=(FloatProgress(value=0.0, max=10725.0), HTML(value='')))


inserting UTC date 2011-03-04 00:00:00


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


inserting UTC date 2011-03-05 00:00:00


HBox(children=(FloatProgress(value=0.0, max=21431.0), HTML(value='')))


inserting UTC date 2011-03-06 00:00:00


HBox(children=(FloatProgress(value=0.0, max=15351.0), HTML(value='')))


inserting UTC date 2011-03-07 00:00:00


HBox(children=(FloatProgress(value=0.0, max=13550.0), HTML(value='')))


inserting UTC date 2011-03-08 00:00:00


HBox(children=(FloatProgress(value=0.0, max=7172.0), HTML(value='')))


inserting UTC date 2012-03-10 00:00:00


HBox(children=(FloatProgress(value=0.0, max=130.0), HTML(value='')))


inserting UTC date 2012-03-11 00:00:00


HBox(children=(FloatProgress(value=0.0, max=244.0), HTML(value='')))


inserting UTC date 2012-03-12 00:00:00


HBox(children=(FloatProgress(value=0.0, max=1992.0), HTML(value='')))


inserting UTC date 2012-03-13 00:00:00


HBox(children=(FloatProgress(value=0.0, max=7404.0), HTML(value='')))


inserting UTC date 2012-03-14 00:00:00


HBox(children=(FloatProgress(value=0.0, max=2592.0), HTML(value='')))


inserting UTC date 2012-03-19 00:00:00


HBox(children=(FloatProgress(value=0.0, max=4312.0), HTML(value='')))


inserting UTC date 2012-03-24 00:00:00


HBox(children=(FloatProgress(value=0.0, max=3261.0), HTML(value='')))


inserting UTC date 2012-03-25 00:00:00


HBox(children=(FloatProgress(value=0.0, max=26333.0), HTML(value='')))


inserting UTC date 2012-03-26 00:00:00


HBox(children=(FloatProgress(value=0.0, max=16186.0), HTML(value='')))


inserting UTC date 2013-02-16 00:00:00


HBox(children=(FloatProgress(value=0.0, max=140.0), HTML(value='')))


inserting UTC date 2013-02-17 00:00:00


HBox(children=(FloatProgress(value=0.0, max=197.0), HTML(value='')))


inserting UTC date 2013-02-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=1915.0), HTML(value='')))


inserting UTC date 2013-02-19 00:00:00


HBox(children=(FloatProgress(value=0.0, max=774.0), HTML(value='')))


inserting UTC date 2013-03-09 00:00:00


HBox(children=(FloatProgress(value=0.0, max=8197.0), HTML(value='')))


inserting UTC date 2013-03-10 00:00:00


HBox(children=(FloatProgress(value=0.0, max=25419.0), HTML(value='')))


inserting UTC date 2013-03-11 00:00:00


HBox(children=(FloatProgress(value=0.0, max=16080.0), HTML(value='')))


inserting UTC date 2013-03-12 00:00:00


HBox(children=(FloatProgress(value=0.0, max=24935.0), HTML(value='')))


inserting UTC date 2013-03-13 00:00:00


HBox(children=(FloatProgress(value=0.0, max=19961.0), HTML(value='')))


inserting UTC date 2013-03-14 00:00:00


HBox(children=(FloatProgress(value=0.0, max=15494.0), HTML(value='')))


inserting UTC date 2013-03-15 00:00:00


HBox(children=(FloatProgress(value=0.0, max=15632.0), HTML(value='')))


inserting UTC date 2013-03-16 00:00:00


HBox(children=(FloatProgress(value=0.0, max=6429.0), HTML(value='')))




In [32]:
datetime.strptime('2010-03-18', r'%Y-%m-%d')

datetime.datetime(2010, 3, 18, 0, 0)