In [119]:
import ast
import os
import re
from fnmatch import fnmatch

import pandas as pd
from os.path import join


In [160]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [61]:
logdir = 'hawk-out'
logfiles = ['out-val-opt-fix-neos-1-1341545.hawk-pbs5-20210830T1612',
'val-neos-1337146_1341988.hawk-pbs5/out-val-neos-1337146_1341988.hawk-pbs5-20210831T0038.txt',
'out-val-opt-fix-neos-1-1341541.hawk-pbs5-20210830T1606',
'out-val-opt-neos-32-1337147.hawk-pbs5-20210826T1653',
'out-opt-neos-32-1337146.hawk-pbs5-20210826T1502',
'val-neos-1337146._1341912.hawk-pbs5/out-val-neos-1337146._1341912.hawk-pbs5-20210830T2145.txt',
'out-val-opt-fix-neos-32-1341529.hawk-pbs5-20210830T2000']
files = list(map(lambda f: join(logdir,f), logfiles))

In [386]:
def parse_file(filename):
#     print(filename)
    with open(filename, 'r') as file:
        res = {
            'jobid': re.search('[_-](\d+\.hawk-pbs5)', filename).group(1),
            'filename': os.path.basename(filename),
            'file': filename,
            # 'nodes': int(file.readline()),
            # 'cores': int(file.readline()),
        }
        for l in file:
            res.update(parse_stats(l))
#         if len(res)<=2:
#              return {}
        return res
    
def parse_stats(s):
    std_fields = (
        ('nodes', int),
        ('mpiprocs', int),
        ('walltime', str),
        ('jobid', str),
        ('date', str),
        ('topic', str),
        ('code_version', str),
        ('mpi_size', int),
        ('mae_mean_retweets', float),
        ('mape_mean_retweets', float),
        ('mae_retweet_probability', float),
        ('mape_retweet_probability', float),
        ('setuptime', float),
        ('runtime', float),
        ('totaltime', float),
        ('seed', int),
        ('grid', ast.literal_eval),
        ('opt', ast.literal_eval),        
    )

    fields = [
        ('cfeatures', '(\d+) features, \d+ sources, \d+ samples$', int),
        ('csources', '\d+ features, (\d+) sources, \d+ samples$', int),
        ('csamples', '\d+ features, \d+ sources, (\d+) samples$', int),
        ('totaltime', 'Total Time Elapsed: (.+)$', float),
    ]
    fields += [(x, f'{x}: (.+)$', t) for x, t in std_fields]
    
    res = {}
    for f in fields:
        m = re.search(f[1], s)
        if m:
            res[f[0]] = f[2](m.group(1))

    m = re.search('args: (.+)?', s)
    if m:
        try:
            args=ast.literal_eval(m.group(1))
        except ValueError:
            from argparse import Namespace
            args=vars(eval(m.group(1)))
            pass
        finally:
            res.update(args)

    return res

def parse_files(logfiles):
    r = pd.DataFrame(map(parse_file, logfiles), dtype=object)
    r.set_index('jobid', inplace=True)
    r['nodes'] = r.mpi_size//128
#    r['total_tweets'] = r.featurabses * r.sources * r.samples
#    r['total_retweets'] = r.features * r.sources * r.samples * r.mean_retweets
#     display(r.isna(column='totaltime'))
    return r.sort_index().dropna(subset=['topic'])

def outfiles(directory):
    for root, dirs, files in os.walk(directory):
        dirs[:]=[d for d in dirs if d[0]!='.'] # remove dotdirs
        for f in files:
            if fnmatch(f,'*out*') and not fnmatch(f,'.*'):
                yield join(root, f)

In [395]:
hawk=outfiles('/Users/ian/duck/hawk/propagation/out')
r = parse_files(hawk)
r=r.append(parse_files(outfiles('hawk-out')))
r.sort_index(inplace=True)
r.to_csv('runs.csv')

In [388]:
aborted=r[r.totaltime.isna()]
r.drop(aborted.index,inplace=True) #dropna(subset=['totaltime'], inplace=True)
aborted

Unnamed: 0_level_0,filename,file,mpi_size,code_version,runid,graph,metis_zero_based,tweets,stats,params,discount,corr,source_map,indir,outdir,features,sources,samples,epsilon,max_depth,max_nodes,sample_split,seed,command,topic,setuptime,cfeatures,csources,csamples,mae_retweet_probability,mae_mean_retweets,mape_retweet_probability,mape_mean_retweets,runtime,totaltime,grid,opt,nodes,corrs,discounts
jobid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
1342941.hawk-pbs5,out-topt_1342941.hawk-pbs5-20210831T1428.txt,/Users/ian/duck/hawk/propagation/out/topt_1342...,1024,v0.2-33-g7d78804,topt_1342941.hawk-pbs5,data/anon_graph_inner_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/topt_1342941.hawk-pbs5,1,50,500,0.001,,,1,268489082187225627088439964171906665614,optimize,neos_20201110,3.737601,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",8,,
1347531.hawk-pbs5,out-opt-neos_1347531.hawk-pbs5-20210902T1944.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347531.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347531.hawk-pbs5,1,64,1000,0.001,,,1,130740515737049131951359262883379819377,optimize,neos,10.165222,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16,,
1347533.hawk-pbs5,out-opt-fpoe_1347533.hawk-pbs5-20210902T2003.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347533.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347533.hawk-pbs5,1,64,1000,0.001,,,1,307507379562727090365491967119568450911,optimize,fpoe,27.270164,,,,,,,,,,,,16,,
1347535.hawk-pbs5,out-opt-neos_1347535.hawk-pbs5-20210902T2023.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347535.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347535.hawk-pbs5,1,64,1000,0.001,,,1,88404763366101979176606411745585869751,optimize,neos,10.205761,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16,,
1347537.hawk-pbs5,out-opt-fpoe_1347537.hawk-pbs5-20210902T2035.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347537.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347537.hawk-pbs5,1,64,1000,0.001,,,1,46530326245742331375175525747901533172,optimize,fpoe,22.131627,,,,,,,,,,,,16,,
1347539.hawk-pbs5,out-opt-neos_1347539.hawk-pbs5-20210903T0310.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347539.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347539.hawk-pbs5,1,64,1000,0.001,,,1,258823136310150087660397586805628538947,optimize,neos,9.932124,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16,,
1347541.hawk-pbs5,out-opt-fpoe_1347541.hawk-pbs5-20210903T0315.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347541.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347541.hawk-pbs5,1,64,1000,0.001,,,1,244967997612630238084226287051867816632,optimize,fpoe,23.039546,,,,,,,,,,,,16,,
1347544.hawk-pbs5,out-opt-neos_1347544.hawk-pbs5-20210903T0320.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347544.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347544.hawk-pbs5,1,64,1000,0.001,,,1,196117843099847536431744318218990594153,optimize,neos,10.115012,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16,,
1347546.hawk-pbs5,out-opt-fpoe_1347546.hawk-pbs5-20210903T0325.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347546.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347546.hawk-pbs5,1,64,1000,0.001,,,1,110602438280087028168196482557985913302,optimize,fpoe,23.294054,,,,,,,,,,,,16,,
1347922.hawk-pbs5,out-val-learn-fpoe_1347922.hawk-pbs5-20210902T...,/Users/ian/duck/hawk/propagation/out/val-learn...,128,v0.2-35-gb726c4d,val-learn-fpoe_1347922.hawk-pbs5,data/anon_graph_inner_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,data/discount-fpoe-1336748.hawk-pbs5.csv,data/corr-fpoe-1336749.hawk-pbs5.csv,,data,out/val-learn-fpoe_1347922.hawk-pbs5,1,256,1000,0.001,,,1,160598436452704781754625972554107525042,val,fpoe_20201110,1.532338,,,,,,,,,,,,1,,


In [400]:
r=r[~r.graph.str.contains('inner')]
r[(r.command=='optimize') & (r.sources==64) & r.topic.str.contains('fpoe')][['code_version','topic','sources','samples','nodes','runtime']]

Unnamed: 0_level_0,code_version,topic,sources,samples,nodes,runtime
jobid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1336118.hawk-pbs5,v0.2-19-gd094223-dirty,fpoe_20201110,64,500,1,44575.961342
1336184.hawk-pbs5,v0.2-23-g5e9472f,fpoe,64,1000,16,6350.919478
1336200.hawk-pbs5,v0.2-24-g01c3ff5-dirty,fpoe,64,1000,16,15130.526506
1336904.hawk-pbs5,v0.2-25-gdf92be1-dirty,fpoe,64,1000,16,15447.69132
1341842.hawk-pbs5,v0.2-33-g7d78804,fpoe,64,1000,32,7527.317517
1347533.hawk-pbs5,v0.2-36-g06ec3af,fpoe,64,1000,16,
1347537.hawk-pbs5,v0.2-36-g06ec3af,fpoe,64,1000,16,
1347541.hawk-pbs5,v0.2-36-g06ec3af,fpoe,64,1000,16,
1347546.hawk-pbs5,v0.2-36-g06ec3af,fpoe,64,1000,16,


In [329]:
aborted

Unnamed: 0_level_0,filename,file,mpi_size,code_version,runid,graph,metis_zero_based,tweets,stats,params,discount,corr,source_map,indir,outdir,features,sources,samples,epsilon,max_depth,max_nodes,sample_split,seed,command,topic,setuptime,cfeatures,csources,csamples,mae_retweet_probability,mae_mean_retweets,mape_retweet_probability,mape_mean_retweets,runtime,totaltime,grid,opt,nodes
jobid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
1347531.hawk-pbs5,out-opt-neos_1347531.hawk-pbs5-20210902T1944.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347531.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347531.hawk-pbs5,1,64,1000,0.001,,,1,130740515737049131951359262883379819377,optimize,neos,10.165222,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16
1347533.hawk-pbs5,out-opt-fpoe_1347533.hawk-pbs5-20210902T2003.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347533.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347533.hawk-pbs5,1,64,1000,0.001,,,1,307507379562727090365491967119568450911,optimize,fpoe,27.270164,,,,,,,,,,,,16
1347535.hawk-pbs5,out-opt-neos_1347535.hawk-pbs5-20210902T2023.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347535.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347535.hawk-pbs5,1,64,1000,0.001,,,1,88404763366101979176606411745585869751,optimize,neos,10.205761,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16
1347537.hawk-pbs5,out-opt-fpoe_1347537.hawk-pbs5-20210902T2035.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347537.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347537.hawk-pbs5,1,64,1000,0.001,,,1,46530326245742331375175525747901533172,optimize,fpoe,22.131627,,,,,,,,,,,,16
1347539.hawk-pbs5,out-opt-neos_1347539.hawk-pbs5-20210903T0310.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347539.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347539.hawk-pbs5,1,64,1000,0.001,,,1,258823136310150087660397586805628538947,optimize,neos,9.932124,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16
1347541.hawk-pbs5,out-opt-fpoe_1347541.hawk-pbs5-20210903T0315.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347541.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347541.hawk-pbs5,1,64,1000,0.001,,,1,244967997612630238084226287051867816632,optimize,fpoe,23.039546,,,,,,,,,,,,16
1347544.hawk-pbs5,out-opt-neos_1347544.hawk-pbs5-20210903T0320.txt,/Users/ian/duck/hawk/propagation/out/opt-neos_...,2048,v0.2-36-g06ec3af,opt-neos_1347544.hawk-pbs5,data/anon_graph_outer_neos_20201110.npz,False,data/sim_features_neos_20201110.csv,,,,,,data,out/opt-neos_1347544.hawk-pbs5,1,64,1000,0.001,,,1,196117843099847536431744318218990594153,optimize,neos,10.115012,,,,,,,,,,"{'discount_factor': (0.0, 1.0, 0.1), 'corr': (...","{'discount_factor': (0.0, 1.0, 0.01), 'corr': ...",16
1347546.hawk-pbs5,out-opt-fpoe_1347546.hawk-pbs5-20210903T0325.txt,/Users/ian/duck/hawk/propagation/out/opt-fpoe_...,2048,v0.2-36-g06ec3af,opt-fpoe_1347546.hawk-pbs5,data/anon_graph_outer_fpoe_20201110.npz,False,data/sim_features_fpoe_20201110.csv,,,,,,data,out/opt-fpoe_1347546.hawk-pbs5,1,64,1000,0.001,,,1,110602438280087028168196482557985913302,optimize,fpoe,23.294054,,,,,,,,,,,,16


In [323]:
c=r.groupby(['discount']).agg(list)[['runid','file','command','nodes','topic','mae_mean_retweets','mape_mean_retweets','runtime', 'totaltime']]
c

Unnamed: 0_level_0,runid,file,command,nodes,topic,mae_mean_retweets,mape_mean_retweets,runtime,totaltime
discount,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
data/discount-fpoe-1336748.hawk-pbs5.csv,"[val-learn-fpoe_1349513.hawk-pbs5, val-learn-f...",[/Users/ian/duck/hawk/propagation/out/val-lear...,"[val, val, val]","[8, 1, 1]","[fpoe_20201110, fpoe_20201110, fpoe_20201110]","[0.8520061136456212, 0.8303867884377327, 0.821...","[0.17138241205284352, 0.16703364971212645, 0.1...","[120.43971395492554, 478.8196804523468, 465.85...","[147.42072868347168, 483.84821677207947, 470.8..."
out/discount-fpoe_1348025.hawk-pbs5/discount-fpoe-discount-fpoe_1348025.hawk-pbs5.csv,[val-learn-discount-fpoe_1348025.hawk-pbs5_134...,[/Users/ian/duck/hawk/propagation/out/val-lear...,[val],[8],[fpoe],[0.7966782535157133],[0.16025312322390214],[123.55576133728027],[150.65532279014587]
out/discount-fpoe_1348031.hawk-pbs5/discount-fpoe-discount-fpoe_1348031.hawk-pbs5.csv,[val-learn-discount-fpoe_1348031.hawk-pbs5_134...,[/Users/ian/duck/hawk/propagation/out/val-lear...,[val],[8],[fpoe],[0.8064462978287787],[0.16221798118513095],[121.03461861610413],[151.31356930732727]
out/discount-neos_1348022.hawk-pbs5/discount-neos-discount-neos_1348022.hawk-pbs5.csv,[val-learn-discount-neos_1348022.hawk-pbs5_134...,[/Users/ian/duck/hawk/propagation/out/val-lear...,[val],[8],[neos],[0.5053378620289779],[0.14083118165946348],[22.848803758621216],[39.024643898010254]


In [335]:
r[r.command=='optimize'].graph

jobid
1341840.hawk-pbs5    data/anon_graph_outer_neos_20201110.npz
1341842.hawk-pbs5    data/anon_graph_outer_fpoe_20201110.npz
Name: graph, dtype: object

In [None]:
p = r.pivot(index='nodes', columns='topic', values='runtime')
p.to_csv('scaling.csv')

In [None]:
p = r.pivot(index='nodes', columns='topic', values='mae_mean_retweets')
p.to_csv('mae.csv')

In [None]:
r