# Logs

In [None]:
from os import environ
from pathlib import Path

import pandas as pd
import plotly.express as px

import prep
from utils import figs_labels

RUN_DIR_PATH = Path(environ.get('LOGS_DIR', './../runs/test/'))

## Configurations

In [None]:
from deepdiff import DeepDiff

confs_df = prep.confs(RUN_DIR_PATH)

base_env = confs_df['env'].iloc[0]
print(f"Took {confs_df['node'].iloc[0]} conf. as the base.")

for idx, row in confs_df.iloc[1:].iterrows():
	row_env = row['env']
	diff = DeepDiff(base_env, row_env, ignore_order=True)

	print(f"Diff. {row['node']}: {diff}.")

## Models

In [None]:
models_df = prep.models(RUN_DIR_PATH)

# TODO: check if models' init. are homogenous or heterogeneous; group by init. fingerprint

## Initial Peers Topology

In [None]:
from pyvis.network import Network as PyVisNetwork
import networkx as nx

init_peers_df = prep.init_peers(RUN_DIR_PATH)

init_peers_graph = prep.init_peers_nx_graph(init_peers_df)

init_peers_net = PyVisNetwork(notebook=True, directed=init_peers_graph.is_directed(), cdn_resources='in_line')
init_peers_net.set_options('{"layout": {"randomSeed": 42}}')
init_peers_net.from_nx(init_peers_graph)
init_peers_net.show(name=str(RUN_DIR_PATH / 'pyvis-initial-peers-topology-network.html'))

## Simulated Latencies

In [None]:
sim_lats_df = prep.sim_lats(RUN_DIR_PATH)

sim_lats_graph = nx.DiGraph()
sim_lats_graph.add_nodes_from(pd.concat([sim_lats_df['node'], sim_lats_df['peer']]).unique())

sim_lats_graph.add_edges_from(
	sim_lats_df.apply(
		lambda r: (r['node'], r['peer'], {'latency_ms': r['latency_ms']} if 'latency_ms' in r else {}),
		axis='columns'
	)
)

sim_lats_net = PyVisNetwork(notebook=True, directed=sim_lats_graph.is_directed(), cdn_resources='in_line')
sim_lats_net.set_options('{"layout": {"randomSeed": 42}}')
sim_lats_net.from_nx(sim_lats_graph)

for edge in sim_lats_net.edges:
	lat = edge.get('latency_ms', None)
	if lat is not None:
		edge['label'] = f"{lat} ms"

sim_lats_net.show(name=str(RUN_DIR_PATH / 'pyvis-simulated-latencies.html'))

In [None]:
for edge in init_peers_net.edges:
	if sim_lats_graph[edge['from']][edge['to']].get('latency_ms', None) is not None \
			or sim_lats_graph[edge['to']][edge['from']].get('latency_ms', None) is not None:
		lat = sim_lats_graph[edge['from']][edge['to']].get('latency_ms', 0) \
		      + sim_lats_graph[edge['to']][edge['from']].get('latency_ms', 0)
		edge['label'] = f"{lat} ms"

init_peers_net.show(name=str(RUN_DIR_PATH / 'pyvis-simulated-latencies-initial-peers.html'))

## Data Distribution

In [None]:
data_dist_df = prep.data_dist(RUN_DIR_PATH)

data_dist_train_df = data_dist_df[data_dist_df['type'] == 'train']
data_dist_test_df = data_dist_df[data_dist_df['type'] == 'test']

fig_data_dist_train = px.bar(
	data_dist_train_df,
	x='node', y='count', color='target',
	title='Train Data Distribution - Labels per Node', labels=figs_labels,
	category_orders={'node': data_dist_train_df[['node', 'node_index']].sort_values('node_index')['node'].unique(),
	                 'target': sorted(data_dist_train_df['target'].unique())}
)
fig_data_dist_test = px.bar(
	data_dist_test_df,
	x='node', y='count', color='target',
	title='Test Data Distribution - Labels per Node', labels=figs_labels,
	category_orders={'node': data_dist_test_df[['node', 'node_index']].sort_values('node_index')['node'].unique(),
	                 'target': sorted(data_dist_test_df['target'].unique())}
)

fig_data_dist_train.show()
fig_data_dist_test.show()

fig_data_dist_train = px.bar(
	data_dist_train_df,
	x='target', y='count', color='node',
	title='Train Data Distribution - Nodes per Label', labels=figs_labels,
	category_orders={'target': sorted(data_dist_train_df['target'].unique())}
)
fig_data_dist_test = px.bar(
	data_dist_test_df,
	x='target', y='count', color='node',
	title='Test Data Distribution - Nodes per Label', labels=figs_labels,
	category_orders={'target': sorted(data_dist_test_df['target'].unique())}
)

fig_data_dist_train.show()
fig_data_dist_test.show()

## Accuracies

In [None]:
accs_df = prep.accs(RUN_DIR_PATH)

accs_df_rtrain = accs_df[accs_df['type'] == 'raw-train'].drop(columns=['type'])  # `rtrain` stands for `raw_train`.
accs_df_train = accs_df[accs_df['type'] == 'train'].drop(columns=['type'])
accs_df_test = accs_df[accs_df['type'] == 'test'].drop(columns=['type'])
accs_df_test_pre_agg = accs_df[(accs_df['type'] == 'test-pre-agg') | \
                               ((accs_df['type'] == 'test') & (accs_df['round'] == -1))] \
	.drop(columns=['type'])

accs_df_rtrain_mean = prep.accs_mean(accs_df_rtrain)
accs_df_train_mean = prep.accs_mean(accs_df_train)
accs_df_test_mean = prep.accs_mean(accs_df_test)
accs_df_test_pre_agg_mean = prep.accs_mean(accs_df_test_pre_agg)

accs_df_rtrain = pd.concat([accs_df_rtrain_mean, accs_df_rtrain])
accs_df_train = pd.concat([accs_df_train_mean, accs_df_train])
accs_df_test = pd.concat([accs_df_test_mean, accs_df_test])
accs_df_test_pre_agg = pd.concat([accs_df_test_pre_agg_mean, accs_df_test_pre_agg])

In [None]:
px.line(
	accs_df_test, x='round', y='accuracy', color='node', markers=False,
	title='Test Accuracy per Round', labels=figs_labels
).show()
px.line(
	accs_df_test, x='round', y='mean_loss', color='node', markers=False,
	title='Test Mean Loss per Round', labels=figs_labels
).show()

In [None]:
px.line(
	accs_df_test_pre_agg, x='round', y='accuracy', color='node', markers=False,
	title='Test (Pre-Agg.) Accuracy per Round', labels=figs_labels
).show()
px.line(
	accs_df_test_pre_agg, x='round', y='mean_loss', color='node', markers=False,
	title='Test (Pre-Agg.) Mean Loss per Round', labels=figs_labels
).show()

In [None]:
px.line(
	accs_df_train, x='round', y='accuracy', color='node', markers=False,
	title='Train Accuracy per Round', labels=figs_labels
).show()
px.line(
	accs_df_train, x='round', y='mean_loss', color='node', markers=False,
	title='Train Mean Loss per Round', labels=figs_labels
).show()

In [None]:
px.line(
	accs_df_rtrain, x='round', y='accuracy', color='node', markers=False,
	title='Raw Train Accuracy per Round', labels=figs_labels
).show()
px.line(
	accs_df_rtrain, x='round', y='mean_loss', color='node', markers=False,
	title='Raw Train Mean Loss per Round', labels=figs_labels
).show()

## Similarities

In [None]:
sims_df = prep.sims(RUN_DIR_PATH)

In [None]:
round_ = 0
normalized = False  # Set `True` to plot the normalized similarity.

import numpy as np

round_sims_df = sims_df[sims_df['round'] == round_].copy()

# Comment to include the main diagonal.
round_sims_df['sim'] = round_sims_df.apply(lambda row: np.nan if row['peer'] == row['target_peer'] else row['sim'],
                                           axis='columns')

if normalized:
	from sklearn.preprocessing import MinMaxScaler

	round_sims_df['norm_sim'] = MinMaxScaler().fit_transform(round_sims_df[['sim']])

# Comment these lines to keep the symmetric bottom triangle of the matrix.
round_sims_df['peer_target_peer'] = \
	round_sims_df.apply(lambda row: tuple(sorted([row['peer'], row['target_peer']])), axis='columns')
round_sims_df.drop_duplicates('peer_target_peer', inplace=True)
round_sims_df.drop(columns='peer_target_peer', inplace=True)

fig_sims = px.imshow(
	round_sims_df.pivot(index='peer', columns='target_peer', values='norm_sim' if normalized else 'sim'),
	# zmin=0, zmax=1,
	text_auto='.2f',
	labels=figs_labels | {'y': 'Peer', 'x': 'Target Peer', 'color': f"Similarity{' (norm.)' if normalized else ''}"},
	title=f"Similarity{' (norm.)' if normalized else ''} Matrix Round #{round_}"
)
fig_sims.show()

### SON Similarities

In [None]:
son_sims_df = prep.son_sims(RUN_DIR_PATH)

In [None]:
round_ = 0
normalized = False  # Set `True` to plot the normalized similarity.

import numpy as np

round_sims_df = son_sims_df[son_sims_df['round'] == round_].copy()

if normalized:
	from sklearn.preprocessing import MinMaxScaler

	round_sims_df['norm_sim'] = MinMaxScaler().fit_transform(round_sims_df[['sim']])

px.imshow(
	round_sims_df.pivot(index='node', columns='son_node', values='norm_sim' if normalized else 'sim'),
	# zmin=0, zmax=1,
	text_auto='.2f',
	labels=figs_labels | {'y': 'Node', 'x': 'SON Node', 'color': f"Similarity{' (norm.)' if normalized else ''}"},
	title=f"SON Similarity{' (norm.)' if normalized else ''} Matrix Round #{round_}"
).show()

## Aggregation Peers

In [None]:
agg_peers_df = prep.agg_peers(RUN_DIR_PATH)

In [None]:
round_ = agg_peers_df['round'].max()

r_agg_peers_df = agg_peers_df[agg_peers_df['round'] <= round_]
r_agg_peers_df = r_agg_peers_df.groupby(['node', 'peer']).size().reset_index(name='count')
r_agg_peers_df = r_agg_peers_df[r_agg_peers_df['count'] != 0]

# FIXME: add `np.nan` for missing entries.

fig_agg_peers = px.imshow(
	r_agg_peers_df.pivot(index='node', columns='peer', values='count'),
	text_auto=True, labels=figs_labels | {'y': 'Node', 'x': 'Peer Node', 'color': 'Count'},
	title=f"Peers Agg. Count Up-to Round #{round_}"
)
fig_agg_peers.show()

## Zones

In [None]:
zones_df = prep.zones(RUN_DIR_PATH)

In [None]:
import networkx as nx
from pyvis.network import Network as PyVisNetwork

round_ = 0

r_zones_df = zones_df[(zones_df['round'] == round_)]

r_zones_graph = nx.DiGraph()
r_zones_graph.add_nodes_from(set(r_zones_df['initiator_node_repr'].unique()) | set(r_zones_df['peer_repr'].unique()))

grouped_r_zones_df = r_zones_df.groupby(['initiator_node_repr'])
for (initiator_node,), group_df in grouped_r_zones_df:
	peers = group_df['peer_repr']
	r_zones_graph.add_edges_from(list(zip([initiator_node] * len(peers), peers)))

r_zones_net = PyVisNetwork(notebook=True, directed=True, cdn_resources='in_line')
r_zones_net.set_options('{"layout": {"randomSeed": 42, "hierarchical": {"direction": "UD", "sortMethod": "directed"}}}')
r_zones_net.from_nx(r_zones_graph)
r_zones_net.show(name=str(RUN_DIR_PATH / 'pyvis-zones.html'))

## Clusters

In [None]:
clusters_df = prep.clusters(RUN_DIR_PATH)

In [None]:
super_clusters_representatives_df = prep.super_clusters_representatives(RUN_DIR_PATH)

for (round_,), r_super_clusters_representatives_df in super_clusters_representatives_df.groupby(['round']):
	base_super_clusters_representatives = r_super_clusters_representatives_df.iloc[0]['super_clusters_representatives']
	assert r_super_clusters_representatives_df['super_clusters_representatives'].apply(
		lambda s: s == base_super_clusters_representatives).all()

for (round_,), r_clusters_df in clusters_df.groupby(['round']):
	assert set(r_clusters_df[r_clusters_df['level'] == r_clusters_df['level'].max()]['representative_node']) \
	       == super_clusters_representatives_df[(super_clusters_representatives_df['round'] == round_)] \
		       .iloc[0]['super_clusters_representatives']

In [None]:
import networkx as nx
from pyvis.network import Network as PyVisNetwork

round_ = 0

r_clusters_df = clusters_df[(clusters_df['round'] == round_)]

r_clusters_graph = nx.DiGraph()
r_clusters_graph.add_nodes_from(
	set(r_clusters_df['representative_node_repr'].unique()) | set(r_clusters_df['peer_repr'].unique()))

grouped_r_clusters_df = r_clusters_df.groupby(['representative_node_repr'])
for (representative_node,), group_df in grouped_r_clusters_df:
	peers = group_df['peer_repr']
	r_clusters_graph.add_edges_from(list(zip([representative_node] * len(peers), peers)))

r_clusters_net = PyVisNetwork(notebook=True, directed=True, cdn_resources='in_line')
r_clusters_net.set_options(
	'{"layout": {"randomSeed": 42, "hierarchical": {"direction": "UD", "sortMethod": "directed"}}}')
r_clusters_net.from_nx(r_clusters_graph)
r_clusters_net.show(name=str(RUN_DIR_PATH / 'pyvis-clusters.html'))

## Searches

In [None]:
searches_df = prep.searches(RUN_DIR_PATH, sims_df=prep.son_sims(RUN_DIR_PATH))  # Evaluate on last SON data baseline similarities.
# searches_df = prep.searches(RUN_DIR_PATH, sims_df=prep.sims(RUN_DIR_PATH)) # Evaluate on live SON data baseline similarities.

### Evaluations

In [None]:
fig_recall = px.box(
	searches_df,
	y='recall',
	title='Recall', labels=figs_labels
)

fig_precision = px.box(
	searches_df,
	y='precision',
	title='Precision', labels=figs_labels
)

fig_accuracy = px.box(
	searches_df,
	y='accuracy',
	title='Accuracy', labels=figs_labels
)

fig_ndcg = px.box(
	searches_df,
	y='ndcg',
	title='NDCG', labels=figs_labels
)

fig_recall.show()
fig_precision.show()
fig_accuracy.show()
fig_ndcg.show()

### Adaptive Minimum Similarity Threshold

In [None]:
px.line(
	searches_df,
	x='round', y='k_diff', color='node',
	title='K Diff. (return. res. - target k) over Rounds', labels=figs_labels | {'k_diff': 'K Diff.'},
	markers=True
).show()

In [None]:
px.line(
	searches_df,
	x='round', y='min_similarity', color='node',
	title='Search Min. Sim. per Round', labels=figs_labels,
	markers=False
).show()

### The Isotonic Method

In [None]:
round_ = searches_df['round'].max()

round_searches_df = searches_df[searches_df['round'] == round_]

xys_df = []
for _, row in round_searches_df.iterrows():
	node = row['node']
	for x, y in row['isotonic_xys']:
		xys_df.append({'node': node, 'x': x, 'y': y})
xys_df = pd.DataFrame(xys_df)

px.line(
	xys_df,
	x='x', y='y', color='node',
	title=f'The Fit. Isotonic Reg. Line - Round #{round_}',
	labels=figs_labels | {'x': 'Min. Sim.', 'y': 'Return. Res.'},
).show()

## Durations

In [None]:
durs_df = prep.durs(RUN_DIR_PATH)

### Searches Readiness Durations

In [None]:
search_ready_durs_df = durs_df[durs_df['type'] == 'search-ready']

fig_search_ready = px.bar(
	search_ready_durs_df,
	x='round', y='duration_seconds', color='node', barmode='group',
	title='Search Readiness Duration per Round', labels=figs_labels
)

fig_search_ready_agg = px.box(
	search_ready_durs_df,
	x='round', y='duration_seconds',
	title='Search Readiness Duration per Round', labels=figs_labels
)

fig_search_ready.show()
fig_search_ready_agg.show()

### Searches Durations

In [None]:
search_durs_df = durs_df[durs_df['type'] == 'search']

fig_search_durs = px.box(
	search_durs_df,
	x='round', y='duration_seconds', color='node',
	title="Search Duration per Round", labels=figs_labels
)

fig_search_durs_agg = px.box(
	search_durs_df,
	x='round', y='duration_seconds',
	title="Search Duration per Round", labels=figs_labels
)

fig_search_durs.show()
fig_search_durs_agg.show()

### Rounds' Durations

In [None]:
rounds_durs_df = durs_df[durs_df['type'] == 'round']

px.box(
	rounds_durs_df,
	x='round', y='duration_seconds',
	title='Rounds\' Durations', labels=figs_labels
).show()