In [1]:
import csv
from dataclasses import dataclass
from datetime import datetime
from typing import Optional

from IPython.display import clear_output
import pytz

In [2]:
# Open daa_snapshot.csv
# Contains daily daa/timestamps from genesis to Jan 24th 2024
# Used for estimating date via interpolation

@dataclass
class DAATimestamp:
    daa_score: int
    timestamp_ms: int
    datetime: datetime

daa_timestamps = []
with open('data/daa_timestamps.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) # skip header row

    for row in reader:
        daa_timestamps.append(DAATimestamp(
            daa_score=int(row[0]), 
            timestamp_ms=int(row[1]), 
            datetime=row[2]
        ))

print(f'low snapshot daa: {daa_timestamps[0].daa_score} {daa_timestamps[-1].datetime}')
print(f'high snapshot daa: {daa_timestamps[-1].daa_score} {daa_timestamps[-1].datetime}')

low snapshot daa: 0 2024-01-24 08:59:58.71+00
high snapshot daa: 69694070 2024-01-24 08:59:58.71+00


In [3]:
def _interpolate_datetime(index, daa_score):
    current, next = daa_timestamps[index], daa_timestamps[index+1]
    score_difference = next.daa_score - current.daa_score
    
    if score_difference == 0:
        return None

    frac = (daa_score - current.daa_score) / score_difference
    interpolated_milliseconds = int(current.timestamp_ms + 
                                    (next.timestamp_ms - current.timestamp_ms) * frac)

    return pytz.utc.localize(datetime.fromtimestamp(interpolated_milliseconds / 1000))

In [4]:
def _binary_search_daa(daa_score):
    low, high = 0, len(daa_timestamps) - 1
    
    while low <= high:
        mid = (low + high) // 2
        mid_daa_score = daa_timestamps[mid].daa_score

        if mid_daa_score == daa_score:
            return mid

        if mid < len(daa_timestamps) - 1 and mid_daa_score <= daa_score < daa_timestamps[mid+1].daa_score:
            return mid

        if daa_score < mid_daa_score:
            high = mid - 1
        else:
            low = mid + 1

    return None

In [5]:
def estimate_datetime(daa_score):
    # TODO assuming your UTXO export contains DAAs higher than 
    # the most recent DAA in daa_timestamps.csv
    # Either get highest DAA from node or extrapolate
    
    # If daa_score = most recent daa_timestamp record, just return that datetime
    if daa_score == daa_timestamps[-1].daa_score:
        return daa_timestamps[-1].datetime

    index = _binary_search_daa(daa_score)
    if index is not None:
        return _interpolate_datetime(index, daa_score)

    return None

In [6]:
# Open, utxo_set.csv, iterate rows (utxos) and estimate date for each
# Csv fields are [address, daa_score, amount, is_coinbase]

# utxo_set.csv not included due to file size

# (lazily) storing results in memory

utxo_set = []
with open('data/utxo_set.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)

    for i, row in enumerate(reader):
        # Skip dust UTXOs
        if int(row[2]) <= 1000:
            continue
        
        utxo_daa = int(row[1])
        
        est_datetime = estimate_datetime(utxo_daa)
        
        utxo_set.append({
            # 'address': row[0] # removed to save memory
            'daa_score': int(row[1]),
            'amount': int(row[2]),
            # 'is_coinbase': row[3] # removed to save memory
            'estimated_datetime': est_datetime
        })

        if i % 1_000_000 == 0:
            clear_output()
            print(f'processed {i:2,} utxos')

processed 58,000,000 utxos


In [7]:
# failed to estimate timestamp for these records
# likely the result of utxo_daa being higher than the highest daa (daa_timestamps[-1].daa_score) in daa_timestamps.csv')
for utxo in utxo_set:
    if utxo['estimated_datetime'] is None:
        print(utxo['daa_score'])

69694073
63098762
69694077
69694077
69694071


In [9]:
for utxo in utxo_set[::100_000]:
    print(utxo['daa_score'], utxo['estimated_datetime'])

1490719 2021-11-27 20:20:54.490000+00:00
65640007 2023-12-08 22:03:20.825000+00:00
60059149 2023-10-05 08:42:16.325000+00:00
28382065 2022-10-03 21:48:03.729000+00:00
60456785 2023-10-09 23:04:29.184000+00:00
49172160 2023-06-01 10:17:11.675000+00:00
66009202 2023-12-13 04:32:05.844000+00:00
36001725 2022-12-31 01:26:09.444000+00:00
2328794 2021-12-07 09:56:34.260000+00:00
29313763 2022-10-14 16:32:34.578000+00:00
42367288 2023-03-14 16:46:19.505000+00:00
34661248 2022-12-15 13:16:37.866000+00:00
38711021 2023-01-31 09:35:36.124000+00:00
69175191 2024-01-18 11:59:29.344000+00:00
9302693 2022-02-25 17:04:57.972000+00:00
44152257 2023-04-04 08:13:59.833000+00:00
11382103 2022-03-21 15:05:59.725000+00:00
399001 2021-11-12 04:46:25.006000+00:00
27663675 2022-09-25 14:16:55.564000+00:00
66038638 2023-12-13 12:42:19.192000+00:00
67795237 2024-01-02 20:35:29.725000+00:00
55288051 2023-08-11 04:18:23.597000+00:00
61769907 2023-10-25 03:42:21.055000+00:00
65977370 2023-12-12 19:42:22.020000+00: