In [35]:
import re
from glob import glob

In [39]:
g_fns = glob('logs/g*_*')[::]
h_fns = glob('logs/h*_*')[::-1]


In [116]:
def copula_sort_value(cop):
  sn = cop[:2]
  if sn == 'In':
    return 0
  if sn == 'In':
    return 1
  v = 0
  if sn == 'Gu':
    v += 100
  elif sn == 'Cl':
    v += 200
  ln = cop[-3:-2]
  if ln == '9':
    v+=1
  elif ln == '8':
    v+=2
  elif ln == '7':
    v+=3
  return v


def short_copula_name(cop):
  sn = cop[:2]
  if cop[-1] == '°':
    gr = re.findall(r"\d+", cop)[0]
    sn += f"$^{{{gr}}}$"
  return sn

In [71]:
SamplingFrom = re.compile(r"^Sampling from (?P<copula>[^\s]+)$")
HeuristicTry = re.compile(r"^(?P<copula>[^\s]+) \(WAIC = (?P<waic>[^\s]+)\)$")
GreedyAdd = re.compile(r"^Best added copula: (?P<copula>[^\s]+(\s\d+°)?)\s+\(WAIC = (?P<waic>[^\s]+)\)$")
FinalModel = re.compile(r"^Final model: (?P<copula>[^\s]+)$")
CorrectWAIC = re.compile(r"^Correct WAIC: (?P<correct>[^\s]+), best WAIC (?P<best>[^\s]+), diff: (?P<diff>[^\s]+)$")
HistoryWAIC = re.compile(r"^(?P<copula>[^\s]+) with WAIC = (?P<waic>[^\s]+)$")
SwapWAIC = re.compile(r"^Swap (?P<copula1>[^\s]+)->(?P<copula2>[^\s]+)$")

TookTime = re.compile(r"^Took (?P<time>[^\s]+) min$")

CopulaName = re.compile(r"[A-Z][^A-Z]+")
cname = lambda s: [c.replace(' ', '') for c in CopulaName.findall(s)]

In [65]:
def parse_h(fn):
  with open(fn) as f:
    text = f.read()
  tests = text.split("\n\n")

  records = []

  for test in tests:
  # test = tests[0]
    lines = test.split("\n")
    it = iter(lines)
    match = SamplingFrom.match(next(it))
    if not match:
      break

    c_from = cname(match.group('copula'))
    tries = []
    for line in it:
      match = FinalModel.match(line)
      if match:
        break
      match = HeuristicTry.match(line)
      if match:
        tries.append({
          'copula': cname(match.group('copula')),
          'waic': match.group('waic')
        })
        continue
      match = SwapWAIC.match(line)
      if match:
        c1, c2 = match.group('copula1', 'copula2')
        tries.append({
          'copula': [c2 if c==c1 else c for c in tries[-1]['copula']],
          'waic': '???'
        })
    c_final = cname(match.group('copula'))
    if c_final != tries[-1]['copula']:
      print("FINAL IS NOT EQUAL TO LAST")
      print("\n".join(lines))
      search = [t for t in tries if t['copula'] == c_final]
      if search:
        tries.append(search[0])
      else:
        tries.append({
          'copula': c_final,
          'waic': '???'          
        })

    time = TookTime.match(next(it)).group('time')
    line = next(it)
    match = CorrectWAIC.match(line)
    if match:
      correct, best, diff = match.group('correct', 'best', 'diff')
      if tries[-1]['waic'] == '???':
        tries[-1]['waic'] = best
      line = next(it)
    else:
      correct, best, diff = tries[-1]['waic'], tries[-1]['waic'], '0.0'
      pass
    result = line

    records.append({
      'from': c_from,
      'tries': tries,
      'final': c_final,
      'time': time,
      'result': result,
      'correct': correct,
      'best': best,
      'diff': diff,
      'type': 'heuristic'
    })
  return records

In [138]:
def parse_g(fn):
  with open(fn) as f:
    text = f.read()
  tests = text.split("\n\n")

  records = []

  for test in tests:
    lines = test.split("\n")
    it = iter(lines)
    match = SamplingFrom.match(next(it))
    if not match:
      break

    c_from = cname(match.group('copula'))
    tries = []
    cur_copula = []
    for line in it:
      match = TookTime.match(line)
      if match:
        break
      match = GreedyAdd.match(line)
      if not match:
        prev_line = line
        continue
      cur_copula += cname(match.group('copula'))
      tries.append({
        'copula': cur_copula[:],
        'waic': match.group('waic')
      })

    time = match.group('time')

    match = HistoryWAIC.match(prev_line)
    c_final = cname(match.group('copula'))
    if c_final != tries[-1]['copula']:
      tries.append({
        'copula': c_final,
        'waic': match.group('waic')
      })

    line = next(it)
    match = CorrectWAIC.match(line)
    if match:
      correct, best, diff = match.group('correct', 'best', 'diff')
      line = next(it)
    else:
      correct, best, diff = tries[-1]['waic'], tries[-1]['waic'], '0.0'
      pass

    result = line

    records.append({
      'from': c_from,
      'tries': tries,
      'final': c_final,
      'time': time,
      'result': result,
      'correct': correct,
      'best': best,
      'diff': diff,
      'type': 'greedy'
    })
  return records

In [139]:
records = []
for fn in h_fns:
  records += parse_h(fn)
for fn in g_fns:
  records += parse_g(fn)

FINAL IS NOT EQUAL TO LAST
Sampling from Gaussian
Gaussian (WAIC = -0.1513)
IndependenceGaussianGumbel180°Gumbel270°Gumbel0°Gumbel90° (WAIC = -0.1499)
IndependenceGaussianClayton0°Clayton90°Clayton180°Clayton270° (WAIC = -0.1498)
Final model: Gaussian
Took 3.0 min
Pass
FINAL IS NOT EQUAL TO LAST
Sampling from FrankClayton180°Gumbel270°
Gaussian (WAIC = -0.0185)
IndependenceGaussianGumbel180°Gumbel270°Gumbel0°Gumbel90° (WAIC = -0.1309)
IndependenceGaussianClayton0°Clayton90°Clayton180°Clayton270° (WAIC = -0.1326)
Swap Clayton90°->Gumbel270°
Swap Clayton180°->Gumbel0°
Nothing to reduce
Trying to substitute 2 elements (0 and 1) with a Gauss...
Trying to substitute 2 elements (0 and 2) with a Gauss...
Trying to substitute 2 elements (1 and 2) with a Gauss...
Final model: IndependenceGumbel270°Gumbel0°
Took 6.0 min
Correct WAIC: -0.1414, best WAIC -0.1326, diff: 0.0088
Pass


In [140]:
[i for i in range(-1)]

[]

In [159]:
# sep, eof = '\t', '\n'
sep, eof1, eof2 = ' & ', '\\\\*\n', '\\\\\n' # latex
empty_try = {'copula': '','waic': ''}

N = lambda copula: '\-'.join( short_copula_name(c) for c in copula)
def bf(st,i,l):
    if i==(l-1):
        return f"\\textbf{{{st}}}"
    else:
        return st
def Q(st,diff):
    if diff=='0.0':
        return ''
    else:
        return st
with open('test_table.dat', 'w') as f:
  for r1,r2 in zip(records[:28],records[28:]):
    tries1 = r1['tries'].copy()
    tries2 = r2['tries'].copy()
    len1 = len(tries1)
    len2 = len(tries2)
    for i in range(len1-len2):
        tries2.append(empty_try)
    for i in range(len2-len1):
        tries1.append(empty_try)
    
    diff = 0
    for i, (c1,c2) in enumerate(zip(tries1,tries2)):
        c_from1, time1, res1 = ['']*3
        c_from2, time2, res2 = ['']*3
        if i == 0:
            c_from1, diff1, time1 = r1['from'], r1['diff'], (r1['time'][:-2]+'\,min')
            c_from2, diff2, time2 = r2['from'], r2['diff'], (r2['time'][:-2]+'\,min')
            if (diff1!='0.0') or (diff2!='0.0'):
                diff = 1
        assert(c_from1==c_from2)
#         print(c_from1,"-",c_from2)
        line = sep.join([
        "%-15s" % N(c_from1),
        "%-30s" % bf(N(c2['copula']),i,len2),
        bf(c2['waic'],i,len2),
        "%5s" % time2,
        "%-30s" % bf(N(c1['copula']),i,len1),
        bf(c1['waic'],i,len1),
        "%5s" % time1,
        ])
        _eof = eof2 if (i-diff) == max(len1-1, len2-1) else eof1
        f.write(line+_eof)
    if diff:
        line = sep.join([
        "%-15s" % "",
        "%-30s" % Q('\hfill $\WAIC_{best} - \WAIC_{true}$:',diff2),
        Q(bf(diff2,0,1),diff2),
        "%5s" % "",
        "%-30s" % Q('\hfill $\WAIC_{best} - \WAIC_{true}$:',diff1),
        Q(bf(diff1,0,1),diff1),
        "%5s" % "",
        ])
        f.write(line+eof2)
    f.write("\n\midrule\n")
#   for r in records:
#     for i, c in enumerate(r['tries']):
#       c_from, diff, time, res = ['']*4
#       if i == 0:
#         c_from, diff, time, res = r['from'], r['diff'], r['time'], r['result']
#       line = sep.join([
#         "%-15s" % N(c_from),
#         "%-29s" % N(c['copula']),
#         c['waic'],
#         "%7s" % diff,
#         "%5s" % time,
#         res
#       ])
#       f.write(line+eof)
#     f.write("\n")

In [76]:
records[28]

{'from': ['Gaussian'],
 'tries': [{'copula': ['Gaussian'], 'waic': '-0.1619'},
  {'copula': ['Gaussian', 'Frank'], 'waic': '-0.1610'},
  {'copula': ['Gaussian'], 'waic': '-0.1619'}],
 'final': ['Gaussian'],
 'time': '25.0',
 'result': 'Pass',
 'correct': '-0.1619',
 'best': '-0.1619',
 'diff': '0.0',
 'type': 'greedy'}