In [1]:
import glob

for file in glob.glob('*.log'):
    print(file)

sl0.log
l1_t10.log
l0_t1.log
l2_t10_ct.log
sl1.log
l1_t1.log
l1_t5.log
l2_t10.log


In [34]:
file = 'l2_t10.log'

In [6]:
import re
from collections import defaultdict
import numpy as np


datam = re.compile(
    r'(\d+)\s+(\d+)\s+(\d+|-)\s+(\d+|-)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+'+
    r'(\d+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+)\s+(\d+)\s+(\d+\.?\d*)\s+(\d+)\s+(\d+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)'+
	r'\s+(\d+\.?\d*)\s+(\d+)\s+'
)

def get_log_data(fname):
	with open(fname, 'r') as f:
		text = f.read()

	full_data = datam.findall(text.replace(',','.'))
	assert len(full_data), text

	cols = list(
		filter(
			None,
			re.sub(
				r'\s+',
				' ',
				"Time        UID      TGID       TID    %usr %system  %guest   %wait    %CPU   CPU  minflt/s  majflt/s     VSZ     RSS   %MEM StkSize  StkRef   kB_rd/s   kB_wr/s kB_ccwr/s iodelay"
			).split(' ')
		)
	)[1:]

	ddata = defaultdict(list)
	for data in full_data:
		ddata[data[0]].append(data[1:])

	time_data = list(map(int, ddata.keys()))
	main_t_data = {col:[] for col in cols}
	t_data = {col:defaultdict(list) for col in cols}
	for data_l in ddata.values():
		for data in data_l:
			parent_id, child_id = data[1], data[2]
			
			if(parent_id != '-' and child_id == '-'):
				for col, d in zip(cols, data):
					if(d == '-'):
						continue
					main_t_data[col].append(float(d))
			elif(parent_id == '-' and child_id != '-'):
				for col, d in zip(cols, data):
					if(d == '-'):
						continue
					t_data[col][child_id].append(float(d))

	
	time_data = np.array(time_data)
	time_data -= np.min(time_data)

	return time_data, main_t_data, t_data

In [7]:
from plotly import graph_objects as go

#

times = []
files = {
	'scrapy': 'scrapy.log',
	'kw gdg 0 level 1 tabs': 'kl0_t1.log',
	'kw gdg 1 level 1 tabs': 'kl1_t1.log',
	'kw gdg 1 level 5 tabs': 'kl1_t5.log',
	'kw gdg 1 level 10 tabs': 'kl1_t10.log',
	'kw gdg 2 level 5 tabs': 'kl2_t5.log',
	'kw gdg 2 level 10 tabs': 'kl2_t10.log',
	'gdg 0 level 1 tabs': 'l0_t1.log',
	'gdg 1 level 1 tabs': 'l1_t1.log',
	'gdg 1 level 5 tabs': 'l1_t5.log',
	'gdg 1 level 10 tabs': 'l1_t10.log',
	'gdg 2 level 1 tabs': 'l2_t1.log',
	'gdg 2 level 5 tabs': 'l2_t5.2.log',
	'gdg 2 level 10 tabs': 'l2_t10.log',
}

raw_data = {}
for pretty_name, fname in files.items():
	time, data, _ = get_log_data(fname)

	times.append(time)
	raw_data[pretty_name] = data

joined_data = defaultdict(dict)
for source, data in raw_data.items():
	for col, dl in data.items():
		joined_data[col][source] = dl

time_data = times[np.argmax(list(map(len, times)))]

for col, dls in joined_data.items():
	dls = {src:dl for src,dl in dls.items() if len(dl) and np.std(dl) != 0}
	if(len(dls) == 0):
		continue
	
	fig = go.Figure()

	for src, dl in dls.items():
		fig.add_scatter(x=time_data, y=dl, name=src)
	fig.update_layout(
		title=dict(text=col, yref='paper')
	)
	fig.show()