# Miner signature analysis

Data from Neptune and the Monero Archival Project, Analysis by Isthmus

Examining the information leaked by both the nonce search strategy and how the coinbase tx_extra is used

## Import libraries

In [1]:
import isthmuslib
import pandas as pd
import pathlib
from typing import List, Set
from copy import deepcopy

## Load the data

In [2]:
data_path: pathlib.Path = pathlib.Path.cwd() / 'data' / 'local_only' / 'study_isthmus_coinbase_data.csv'
if not data_path.exists():
    raise ValueError(f"Specified file does not exist: {data_path}")
df = pd.read_csv(data_path)

## Visualize

First where are nonces sampled from?

In [None]:
isthmuslib.scatter(df, 'block_height', 'block_nonce', markersize= 1, figsize=(20,20));

What are the coinbase tx_extra length signatures?

In [None]:
isthmuslib.scatter(df, 'block_height', 'coinbase_len_tx_extra', markersize=1, figsize=(20, 20));

In [None]:
df_recent: pd.DataFrame = deepcopy(df[df['block_height'] > 2210000])
isthmuslib.hist(df_recent, 'coinbase_len_tx_extra', bins=100, log_axes='y', figsize=(12,7), 
                xlim=(32,100), title=f'Coinbase signatures since height {(m:=min(df_recent.block_height))}')
isthmuslib.scatter(df_recent, 'coinbase_len_tx_extra', 'block_height', markersize= 1, figsize=(12,7), 
                xlim=(32,100), title=f'Coinbase signatures since height {m} (colored by nonce)',
                c=df_recent.loc[:,'block_nonce'], cmap='jet');
isthmuslib.plt.scatter(df_recent['coinbase_len_tx_extra'], df_recent['block_height'], 2, df_recent['block_nonce'], cmap='jet');

How many signatures are active?

In [None]:
signatures: Set[int] = set(df_recent.coinbase_len_tx_extra.tolist())
print(f"Currently {len(signatures)} signatures active (since height {m}):\n\nlengths: {sorted(list(signatures))}")

Plot the recent signatures individually. Some of the search patterns are easy to see on linear y-axes, others are easier to see on log y-axes, so we'll plot both here (along with a histogram of activity)

In [None]:
runs: List[List[int]] = []
for length in sorted(list(signatures)):
    df_filter: pd.Series = df_recent.loc[:, 'coinbase_len_tx_extra'] == length
    isthmuslib.scatter(df_recent[df_filter], 'block_height', 'block_nonce', xlabel='height', ylabel='nonce',
                      title=f"Nonce distribution for blocks with len(tx_extra) == {length}", markersize=2,
                      log_axes='', figsize=(10, 5))
    isthmuslib.scatter(df_recent[df_filter], 'block_height', 'block_nonce', xlabel='height', ylabel='nonce',
                      title=f"Nonce distribution for blocks with len(tx_extra) == {length}", markersize=2,
                      log_axes='y', figsize=(10, 5))
    isthmuslib.hist(df_recent[df_filter], 'block_height', xlabel='height', ylabel='activity (block counts)',
                      title=f"Activity of miners with len(tx_extra) == {length} signature"+
                      f"\n(total count = {sum(df_filter)})", markersize=2,
                      log_axes='', figsize=(10, 5), bins=50)
    runs.append(df_recent[df_filter].block_height.to_list())

In [None]:
# Ugly histogram, these need to be stacked... 
isthmuslib.hist(runs, bins=50, log_axes='y', xlabel='height', ylabel='activity');