In [4]:
import pandas as pd
import numpy as np

##### r1tvf1v1r

In [116]:
partial = pd.read_csv("r1tvf1v1r-pre-append.csv")
appended = pd.read_csv("r1tvf1v1r-post-append.csv")

partial['date'] = pd.to_datetime(partial['date'])
appended['date'] = pd.to_datetime(appended['date'])

print("\nPartial run:")
print("First date:", partial['date'].iloc[0])
print("Last date:", partial['date'].iloc[-1])

print("\nAppended:")
print("First date:", appended['date'].iloc[0])
print("Last date:", appended['date'].iloc[-1])


Partial run:
First date: 1986-08-26 23:36:37+00:00
Last date: 2022-05-10 00:15:18+00:00

Appended:
First date: 1986-08-26 23:36:37+00:00
Last date: 2025-03-31 00:15:04+00:00


In [117]:
# Define the cutoff timestamp
cutoff = pd.Timestamp('2022-05-10 00:15:18+00:00')

# Filter appended dates to only keep dates <= cutoff
appended_filtered = appended[appended['date'] <= cutoff]

# Get unique dates from partial and filtered appended
dates_partial = set(partial['date'].unique())
dates_appended_filtered = set(appended_filtered['date'].unique())

# Convert timestamps to ISO 8601 strings with Z suffix
def to_iso_z(timestamps):
    return {ts.strftime('%Y-%m-%dT%H:%M:%SZ') for ts in timestamps}

dates_partial_iso = to_iso_z(dates_partial)
dates_appended_iso = to_iso_z(dates_appended_filtered)

# Find differences after filtering appended
dates_only_in_partial = dates_partial_iso - dates_appended_iso
dates_only_in_appended = dates_appended_iso - dates_partial_iso

# Sort results before printing
dates_only_in_partial_sorted = sorted(dates_only_in_partial)
dates_only_in_appended_sorted = sorted(dates_only_in_appended)

print(f"Dates in pre-append but not in post-append (filtered): {len(dates_only_in_partial_sorted)} found")
for d in dates_only_in_partial_sorted:
    print(d)

print(f"\nDates in post-append (filtered) but not in pre-append: {len(dates_only_in_appended_sorted)} found")
for d in dates_only_in_appended_sorted:
    print(d)

Dates in pre-append but not in post-append (filtered): 4 found
2021-08-09T23:21:49Z
2021-09-10T23:19:25Z
2021-09-26T23:17:35Z
2022-03-05T23:02:42Z

Dates in post-append (filtered) but not in pre-append: 4 found
2021-08-09T23:22:13Z
2021-09-10T23:19:01Z
2021-09-26T23:17:59Z
2022-03-05T23:02:18Z


In [89]:
partial = pd.read_csv("r1tvf1v1r-partial.csv")
appended = pd.read_csv("r1tvf1v1r-appended.csv")

partial['date'] = pd.to_datetime(partial['date'])
appended['date'] = pd.to_datetime(appended['date'])

print("\nPartial run:")
print("First date:", partial['date'].iloc[0])
print("Last date:", partial['date'].iloc[-1])

print("\nAppended:")
print("First date:", appended['date'].iloc[0])
print("Last date:", appended['date'].iloc[-1])


Partial run:
First date: 1986-08-26 23:36:37+00:00
Last date: 2022-05-10 00:15:18+00:00

Appended:
First date: 1986-08-26 23:36:37+00:00
Last date: 2025-03-31 00:15:04+00:00


In [90]:
# Convert date column to string explicitly and strip whitespace
partial_dates = set(partial['date'].astype(str).str.strip())
appended_dates = set(appended['date'].astype(str).str.strip())

# Find common and different dates
common_dates = partial_dates.intersection(appended_dates)
only_in_partial = partial_dates - appended_dates
only_in_appended = appended_dates - partial_dates

# Print summary
print(f"Total unique dates in partial: {len(partial_dates)}")
print(f"Total unique dates in appended: {len(appended_dates)}")
print(f"Dates common to both: {len(common_dates)}")

print("\nDates only in partial:")
for d in sorted(only_in_partial):
    print(f"  {d}")

print("\nDates only in appended:")
for d in sorted(only_in_appended):
    print(f"  {d}")

Total unique dates in partial: 892
Total unique dates in appended: 946
Dates common to both: 888

Dates only in partial:
  2021-08-09 23:21:49+00:00
  2021-09-10 23:19:25+00:00
  2021-09-26 23:17:35+00:00
  2022-03-05 23:02:42+00:00

Dates only in appended:
  2021-08-09 23:22:13+00:00
  2021-09-10 23:19:01+00:00
  2021-09-26 23:17:59+00:00
  2022-03-05 23:02:18+00:00
  2022-06-11 00:15:32+00:00
  2022-06-27 00:15:39+00:00
  2022-07-13 00:15:38+00:00
  2022-07-29 00:15:48+00:00
  2022-08-14 00:15:56+00:00
  2022-09-15 00:16:02+00:00
  2022-10-01 00:16:03+00:00
  2022-10-17 00:15:57+00:00
  2022-11-02 00:16:03+00:00
  2022-11-18 00:15:55+00:00
  2022-12-04 00:15:58+00:00
  2022-12-20 00:15:50+00:00
  2023-01-05 00:15:49+00:00
  2023-01-21 00:15:44+00:00
  2023-02-06 00:15:47+00:00
  2023-02-22 00:15:32+00:00
  2023-03-10 00:15:23+00:00
  2023-03-26 00:15:14+00:00
  2023-04-11 00:14:58+00:00
  2023-05-13 00:14:42+00:00
  2023-06-14 00:14:52+00:00
  2023-07-16 00:15:07+00:00
  2023-08-01 0

##### r1tvf15w8

In [109]:
partial = pd.read_csv("r1tvf15w8-partial.csv")
appended = pd.read_csv("r1tvf15w8-appended.csv")

partial['date'] = pd.to_datetime(partial['date'])
appended['date'] = pd.to_datetime(appended['date'])

print("\nPartial run:")
print("First date:", partial['date'].iloc[0])
print("Last date:", partial['date'].iloc[-1])

print("\nAppended:")
print("First date:", appended['date'].iloc[0])
print("Last date:", appended['date'].iloc[-1])


Partial run:
First date: 1986-08-26 23:36:37+00:00
Last date: 2023-12-23 00:15:38+00:00

Appended:
First date: 1986-08-26 23:36:37+00:00
Last date: 2025-03-31 00:14:40+00:00


In [112]:
# # Define the cutoff timestamp
# cutoff = pd.Timestamp('2023-12-23 00:15:38+00:00')

# # Filter appended dates to only keep dates <= cutoff
# appended_filtered = appended[appended['date'] <= cutoff]

# # Get unique dates from partial and filtered appended
# dates_partial = set(partial['date'].unique())
# dates_appended_filtered = set(appended_filtered['date'].unique())

# # Convert timestamps to ISO 8601 strings with Z suffix
# def to_iso_z(timestamps):
#     return {ts.strftime('%Y-%m-%dT%H:%M:%SZ') for ts in timestamps}

# dates_partial_iso = to_iso_z(dates_partial)
# dates_appended_iso = to_iso_z(dates_appended_filtered)

# # Find differences after filtering appended
# dates_only_in_partial = dates_partial_iso - dates_appended_iso
# dates_only_in_appended = dates_appended_iso - dates_partial_iso

# print("Dates in pre-append but not in post-append (filtered):")
# print(dates_only_in_partial)

# print("\nDates in post-append (filtered) but not in pre-append:")
# print(dates_only_in_appended)

# Define the cutoff timestamp
cutoff = pd.Timestamp('2023-12-23 00:15:38+00:00')

# Filter appended dates to only keep dates <= cutoff
appended_filtered = appended[appended['date'] <= cutoff]

# Get unique dates from partial and filtered appended
dates_partial = set(partial['date'].unique())
dates_appended_filtered = set(appended_filtered['date'].unique())

# Convert timestamps to ISO 8601 strings with Z suffix
def to_iso_z(timestamps):
    return {ts.strftime('%Y-%m-%dT%H:%M:%SZ') for ts in timestamps}

dates_partial_iso = to_iso_z(dates_partial)
dates_appended_iso = to_iso_z(dates_appended_filtered)

# Find differences after filtering appended
dates_only_in_partial = dates_partial_iso - dates_appended_iso
dates_only_in_appended = dates_appended_iso - dates_partial_iso

# Sort results before printing
dates_only_in_partial_sorted = sorted(dates_only_in_partial)
dates_only_in_appended_sorted = sorted(dates_only_in_appended)

print(f"Dates in pre-append but not in post-append (filtered): {len(dates_only_in_partial_sorted)} found")
for d in dates_only_in_partial_sorted:
    print(d)

print(f"\nDates in post-append (filtered) but not in pre-append: {len(dates_only_in_appended_sorted)} found")
for d in dates_only_in_appended_sorted:
    print(d)

Dates in pre-append but not in post-append (filtered): 519 found
1988-02-05T23:44:53Z
1988-10-02T23:46:03Z
1988-11-19T23:45:36Z
1988-12-21T23:45:19Z
1989-03-27T23:44:22Z
1989-07-01T23:42:40Z
1989-07-17T23:42:20Z
1989-09-19T23:40:47Z
1989-10-05T23:41:01Z
1989-12-24T23:38:12Z
1990-03-30T23:35:29Z
1990-04-15T23:35:15Z
1990-05-17T23:35:40Z
1990-06-18T23:35:34Z
1990-07-20T23:35:28Z
1990-08-05T23:35:25Z
1990-08-21T23:35:20Z
1990-09-06T23:34:49Z
1990-09-22T23:35:11Z
1990-10-24T23:34:32Z
1991-01-12T23:35:35Z
1991-01-28T23:35:35Z
1991-03-01T23:36:05Z
1991-04-02T23:37:04Z
1991-09-25T23:39:20Z
1991-10-11T23:39:30Z
1991-11-28T23:39:23Z
1992-01-31T23:39:21Z
1992-04-04T23:39:27Z
1992-04-20T23:38:55Z
1992-05-06T23:39:10Z
1992-06-07T23:38:53Z
1992-08-10T23:37:39Z
1992-08-26T23:37:23Z
1992-09-27T23:36:51Z
1992-11-14T23:36:26Z
1993-02-02T23:36:52Z
1993-02-18T23:37:23Z
1993-04-07T23:37:39Z
1993-04-23T23:37:25Z
1993-06-26T23:37:28Z
1993-12-19T23:37:10Z
1994-02-21T23:35:56Z
1994-03-25T23:35:27Z
1994-04-10T