In [1]:
import csv
from dataclasses import dataclass
from datetime import datetime
from typing import Optional

from IPython.display import clear_output
import pytz

In [14]:
# Open daa_snapshot.csv
# Contains daily daa/timestamps from genesis to Jan 24th 2024
# Used for estimating date via interpolation

@dataclass
class DAATimestamp:
    daa_score: int
    timestamp_ms: int
    datetime: datetime

daa_timestamps = []
with open('data/daa_timestamps.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) # skip header row

    for row in reader:
        daa_timestamps.append(DAATimestamp(
            daa_score=int(row[0]), 
            timestamp_ms=int(row[1]), 
            datetime=row[2]
        ))

print(f'low snapshot daa: {daa_timestamps[0].daa_score} {daa_timestamps[-1].datetime}')
print(f'high snapshot daa: {daa_timestamps[-1].daa_score} {daa_timestamps[-1].datetime}')

low snapshot daa: 0 2024-01-24 08:59:58.71+00
high snapshot daa: 69694070 2024-01-24 08:59:58.71+00


In [3]:
def _interpolate_datetime(index, daa_score):
    current, next = daa_timestamps[index], daa_timestamps[index+1]
    score_difference = next.daa_score - current.daa_score
    
    if score_difference == 0:
        return None

    frac = (daa_score - current.daa_score) / score_difference
    interpolated_milliseconds = int(current.timestamp_ms + 
                                    (next.timestamp_ms - current.timestamp_ms) * frac)

    return pytz.utc.localize(datetime.fromtimestamp(interpolated_milliseconds / 1000))

In [4]:
def _binary_search_daa(daa_score):
    low, high = 0, len(daa_timestamps) - 1
    
    while low <= high:
        mid = (low + high) // 2
        mid_daa_score = daa_timestamps[mid].daa_score

        if mid_daa_score == daa_score:
            return mid

        if mid < len(daa_timestamps) - 1 and mid_daa_score <= daa_score < daa_timestamps[mid+1].daa_score:
            return mid

        if daa_score < mid_daa_score:
            high = mid - 1
        else:
            low = mid + 1

    return None

In [5]:
def estimate_datetime(daa_score):
    # TODO assuming your UTXO export contains DAAs higher than 
    # the most recent DAA in daa_timestamps.csv
    # Either get highest DAA from node or extrapolate
    
    # If daa_score = most recent daa_timestamp record, just return that datetime
    if daa_score == daa_timestamps[-1].daa_score:
        return daa_timestamps[-1].datetime

    index = _binary_search_daa(daa_score)
    if index is not None:
        return _interpolate_datetime(index, daa_score)

    return None

In [6]:
# Open, utxo_set.csv, iterate rows (utxos) and estimate date for each
# Csv fields are [address, daa_score, amount, is_coinbase]

# utxo_set.csv not included due to file size

# (lazily) storing results in memory

utxo_set = []
with open('data/utxo_set.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)

    for i, row in enumerate(reader):
        utxo_daa = int(row[1])
        
        est_datetime = estimate_datetime(utxo_daa)
        
        utxo_set.append({
            # 'address': row[0] # removed to save memory
            'daa_score': int(row[1]),
            'amount': int(row[2]),
            # 'is_coinbase': row[3] # removed to save memory
            'estimated_datetime': est_datetime
        })

        if i % 1_000_000 == 0:
            clear_output()
            print(f'processed {i:2,} utxos')

processed 59,000,000 utxos


In [7]:
# failed to estimate timestamp for these records
# likely the result of utxo_daa being higher than the highest daa (daa_timestamps[-1].daa_score) in daa_timestamps.csv')
for utxo in utxo_set:
    if utxo['estimated_datetime'] is None:
        print(utxo['daa_score'])

69694073
63098762
69694077
69694077
69694071


In [13]:
for utxo in utxo_set[::1_000_000]:
    print(utxo['daa_score'], utxo['estimated_datetime'])

1490719 2021-11-27 20:20:54.490000+00:00
59143774 2023-09-24 18:39:11.164000+00:00
59408899 2023-09-27 20:16:45.357000+00:00
59192064 2023-09-25 08:04:10.362000+00:00
51494933 2023-06-28 07:19:24.738000+00:00
59183506 2023-09-25 05:41:30.732000+00:00
59328603 2023-09-26 21:59:09.139000+00:00
59146898 2023-09-24 19:31:15.759000+00:00
59180188 2023-09-25 04:46:12.100000+00:00
68834888 2024-01-14 15:42:02.981000+00:00
59166138 2023-09-25 00:51:59.424000+00:00
59226428 2023-09-25 17:37:04.035000+00:00
59173764 2023-09-25 02:59:06.876000+00:00
31040333 2022-11-03 16:00:53.274000+00:00
59174728 2023-09-25 03:15:11.060000+00:00
59210338 2023-09-25 13:09:05.949000+00:00
59196980 2023-09-25 09:26:18.313000+00:00
57676565 2023-09-07 19:19:03.153000+00:00
59137465 2023-09-24 16:54:00.963000+00:00
59146013 2023-09-24 19:16:30.591000+00:00
59274110 2023-09-26 06:51:10.728000+00:00
57346665 2023-09-03 23:41:26.719000+00:00
59439416 2023-09-28 04:45:56.811000+00:00
59195543 2023-09-25 09:02:10.938000