In [None]:
from os import environ
from pathlib import Path

logs_dir = Path(environ.get('DFL_LOGS_DIR', './runs/2025-02-17 21-58-59-275732258 +0330/logs/'))

In [None]:
from jsonl import Jsonl
import pandas as pd
from datetime import timedelta

accs_df = pd.DataFrame(columns=('file', 'round', 'type', 'accuracy'))
durs_df = pd.DataFrame(columns=('file', 'round', 'type', 'duration'))

for file in logs_dir.iterdir():
	if not file.is_file():
		print(f"Skipping non-file {file}.")
		continue
	
	for log in Jsonl(file):
		log_type = log.get('type', None)
		match log_type:
			case 'train-accuracy' | 'test-accuracy':
				d = {
					'type': log_type.split('-')[0],
					'file': file.name,
					'round': log['round'],
					'accuracy': log['accuracy'],
				}

				accs_df.loc[len(accs_df)] = d
			case 'time-pull' | 'time-train':
				d = {
					'type': log_type.split('-')[1],
					'file': file.name,
					'round': log['round'],
					'duration': timedelta(seconds=log['time-seconds']),
				}

				durs_df.loc[len(durs_df)] = d
			case _:
				# NOP.
				pass

accs_df['node_index'] = accs_df['file'].str.extract('(\\d+)').astype(int)
durs_df['node_index'] = durs_df['file'].str.extract('(\\d+)').astype(int)

accs_df = accs_df.convert_dtypes()
durs_df = durs_df.convert_dtypes()

accs_df.sort_values(['node_index', 'round'], inplace=True)
durs_df.sort_values(['node_index', 'round'], inplace=True)


In [None]:
accs_df

In [None]:
durs_df

In [None]:
train_accs_df = accs_df[accs_df['type'] == 'train'].drop(columns=['type', 'file'])
test_accs_df = accs_df[accs_df['type'] == 'test'].drop(columns=['type', 'file'])

agg_durs_df = durs_df.groupby(['node_index', 'round', 'type', 'file'], as_index=False)['duration'].sum()
agg_durs_df['duration'] = agg_durs_df['duration'].apply(lambda d: d.total_seconds())

pull_durs_df = agg_durs_df[agg_durs_df['type'] == 'pull'].drop(columns=['type', 'file'])
train_durs_df = agg_durs_df[agg_durs_df['type'] == 'train'].drop(columns=['type', 'file'])

In [None]:
import plotly.express as px

labels = {'node_index': 'Node', 'round': 'Round', 'accuracy': 'Accuracy'}

fig_train = px.line(
	train_accs_df, x='round', y='accuracy', color='node_index', markers=True,
	title='Train Accuracies over Rounds', labels=labels
)
fig_test = px.line(
	test_accs_df, x='round', y='accuracy', color='node_index', markers=True,
	title='Test Accuracies over Rounds', labels=labels
)

fig_test.show()
fig_train.show()

In [None]:
import plotly.express as px

labels = {'node_index': 'Node', 'round': 'Round', 'duration': 'Duration (s)'}

fig_pull = px.line(
	pull_durs_df, x='round', y='duration', color='node_index', markers=True,
	title='Total Comm. Pull Duration over Rounds', labels=labels
)

fig_train = px.line(
	train_durs_df, x='round', y='duration', color='node_index', markers=True,
	title='Train Duration over Rounds', labels=labels
)

fig_pull.show()
fig_train.show()