In [15]:
from pymongo import MongoClient
from tqdm import tqdm
from datetime import time, datetime, timedelta
import pandas as pd

client = MongoClient()
ground_telemetry_collection = client.sphere_telemetry.from_ground_logs

In [16]:
def gps_stamp_to_timedelta(stamp: int) -> timedelta:
	stamp = str(stamp).rjust(6, '0')
	return timedelta(hours=int(stamp[:2]), minutes=int(stamp[2:4]), seconds=int(stamp[4:]))

In [22]:
utc_tolerance = timedelta(hours = 0, seconds=120)

def gps_time_day_switch(utc_time, prev_utc_time) -> bool:
    return utc_time < prev_utc_time - utc_tolerance

In [23]:
def write_header():
    f.write(f'i,"switch_type","local_change","local_delta","utc_change","utc_delta","new_utc_stamp"\n')

def write_row(local_flag, utc_flag):
    if not (local_flag or utc_flag):
        return False
    if local_flag:
        if utc_flag:
            type_ = 'both'
        else:
            type_ = 'local'
    else:
        type_ = 'utc'
    row = (
        f'{row_count},{type_},"{prev_local_dt} -> {local_dt}","{local_dt - prev_local_dt}",'
        + f'"{prev_utc_time} -> {utc_time}","{utc_time-prev_utc_time}","{utc_timestamp}"'
    )
    f.write(row + '\n')
    return True


query = {"GPS_stamp": {"$exists": True}}
total = ground_telemetry_collection.find(query).count()

prev_local_dt = datetime.fromtimestamp(0)
prev_utc_time = timedelta(hours=23, minutes=59, seconds=59)

row_count = 0

with open('day_switches.csv', 'w') as f:
    write_header()
    for doc in tqdm(
        ground_telemetry_collection.find(query).sort("local_dt"),
        total=total
    ):
        local_dt = doc['local_dt']
        utc_timestamp = doc['GPS_stamp']
        utc_time = gps_stamp_to_timedelta(utc_timestamp)
        
        utc_time_switch = gps_time_day_switch(utc_time, prev_utc_time)
        if write_row(local_dt.day != prev_local_dt.day, utc_time_switch):
            row_count += 1

        prev_local_dt = local_dt
        prev_utc_time = utc_time

print(f'\n\n{row_count} day switches found')


100%|██████████| 426293/426293 [00:05<00:00, 78840.33it/s]


39 day switches found


Результат работы предыдущей ячейки выгружен [сюда](https://docs.google.com/spreadsheets/d/1wIniLV3XItgZoUVZ_wDRf4xhny8OmiRFmMkAfX3oLfE/edit#gid=1486757713&fvid=2114670170), вручную размечены правильные времена смены дат по UTC, далее первый лист из того же документа экспротирован в .csv и обрабатывается ниже.

In [27]:
df = pd.read_csv('../data/sphere-telemetry-day-switches - utc-dates-restored-2.csv')
df_dict = df.to_dict('list')
switch_local_dt = [datetime.strptime(dt_str, r'%Y-%m-%d %X') for dt_str in df_dict['local_dt']]
new_utc_date = df_dict['new utc date']

for idx, (start_ldt, end_ldt) in enumerate(
    zip(switch_local_dt, [*switch_local_dt[1:], datetime.now()])
):
    utc_dt_base = datetime.strptime(new_utc_date[idx], r'%Y-%m-%d')
    print(f'inserting UTC date {utc_dt_base}')

    utc_date_query = {"GPS_stamp": {"$exists": True}, "local_dt": {"$gte": start_ldt, "$lt": end_ldt}}

    total = onboard_telemetry_collection.find(utc_date_query).count()
    for doc in tqdm(
        ground_telemetry_collection.find(utc_date_query),
        total=total
    ):
        utc_dt = utc_dt_base + gps_stamp_to_timedelta(doc['GPS_stamp'])
        ground_telemetry_collection.update_one(
            filter={"local_dt": doc["local_dt"]},
            update={"$set": {"utc_dt": utc_dt}}
        )

102it [00:00, 622.03it/s]inserting UTC date 2012-03-13 00:00:00
21332it [00:09, 2222.62it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-14 00:00:00
26180it [00:10, 2568.92it/s]
102it [00:00, 604.98it/s]inserting UTC date 2012-03-15 00:00:00
22807it [00:10, 2234.78it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-16 00:00:00
24542it [00:18, 1350.84it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-17 00:00:00
25630it [00:15, 1676.15it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-18 00:00:00
26172it [00:13, 1915.74it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-19 00:00:00
23595it [00:12, 1858.97it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-20 00:00:00
26196it [00:15, 1740.41it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-21 00:00:00
25444it [00:14, 1799.34it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-22 00:00:00
26182it [00:14, 1811.32it/s]
0it [00:00, ?it/s]inserting UTC date 2012-03-23 00:00:00
25936it [00:12, 2078.20it/s]
0it [00:00, ?it/s]inserting UTC date 201