In [None]:
import os
import subprocess
import glob
import pandas as pd
import io
import matplotlib.pyplot as plt
import json
import re

In [None]:
fileNames = glob.glob(os.path.join('..', 'measurements', '20250226-111240991', '*.json'))
dcts = []
for fileName in fileNames:
    with open(fileName, 'r') as file:
        dct = json.load(file)
        dct['name'] = os.path.basename(fileName)
        dcts.append(dct)
        
for dct in dcts:        
    df = pd.DataFrame(dct['output'])
    dfsWithNames.append({ 'name': dct['name'], 'df': df })

In [None]:
for entry in dfsWithNames:
    df = entry['df']
    name = entry['name']
    clusters = df.groupby('cluster_id')

    plt.figure(figsize=(4,3))
    for clusterId, grp in clusters:
        plt.scatter(grp['x'], grp['y'], s=4)
        plt.xlim(0,44)
        plt.ylim(0,25)
        plt.title(name)
    plt.plot()

In [None]:
def makeProfileDf():

    def extractNClusters(s):
        pattern = r'.*-([^-]+)-(\d+)-(\d+)-(\d+).json'
        match = re.match(pattern, s)
        kernel, nClusters, nPoints, idx = match.groups()
        return nClusters
        
    def extractNPoints(s):
        pattern = r'.*-([^-]+)-(\d+)-(\d+)-(\d+).json'
        match = re.match(pattern, s)
        kernel, nClusters, nPoints, idx = match.groups()
        return nPoints
        
    def extractKernel(s):
        pattern = r'.*-([^-]+)-(\d+)-(\d+)-(\d+).json'
        match = re.match(pattern, s)
        kernel, nClusters, nPoints, idx = match.groups()
        return kernel

    nameLst = [ dct['name'] for dct in dcts ]
    nClustersLst = [ int(extractNClusters(dct['name'])) for dct in dcts ]
    nPointsLst = [ int(extractNPoints(dct['name'])) for dct in dcts ]
    timeTotalLst = [ dct['profile']['timeTotal'] for dct in dcts ]
    kernelLst = [ extractKernel(dct['name']) for dct in dcts ]
    return pd.DataFrame({
        'name': nameLst,
        'kernel': kernelLst,
        'n_clusters': nClustersLst,
        'n_points': nPointsLst,
        'time_total': timeTotalLst
    })

profileDf = makeProfileDf()

In [None]:
profileDf.info()

In [None]:
for nClusters, grp in profileDf.groupby('n_clusters'):
    plt.figure(figsize=(4,3))
    for kernel, ggrp in grp.groupby('kernel'):
        ggrp = ggrp.sort_values('n_points')
        plt.plot(ggrp['n_points'], ggrp['time_total'], label=kernel, marker='x')
    plt.title(f'{nClusters}')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend()
    plt.show()