In [None]:
import csv
import glob

def load_freq_from_perf(perf_text: str):
  f_to_freq = {}
  with open(perf_text) as f:
    r = csv.reader(f)
    for l in r:
      if len(l) < 5:
        continue
      try:
        samples = l[1]

        fname = l[4].split(']')[1].split('.llvm.')[0].strip()
        samples = float(samples)
        if not samples:
          print(samples)
          break
        f_to_freq[fname] = samples
      except BaseException:
        continue

f_to_freq = load_freq_from_perf('/tmp/i2')

In [9]:
import csv
import glob

def load_freqs_from_stats(dirname: str):
  ret:dict[str, float] = {}
  for filename in glob.iglob(os.path.join(dirname,'**'), recursive=True):
    if not filename.endswith('.stats'):
      continue
    with open(filename) as f:
      reader = csv.reader(f)
      for l in reader:
        [fct, md5, freq] = l
        ret[fct.split('.llvm.')[0]] = float(freq)
  return ret

f_to_freq = load_freqs_from_stats('/work/muppet_corpus_samples_head')

In [10]:
rev_freqs = sorted([(freq, fn) for fn, freq in f_to_freq.items()], reverse=True)
total = sum(v for _, v in f_to_freq.items())
important = []

t = 0.0
for freq, fn in rev_freqs:
  if t > 0.99 * total:
    break
  t += freq
  important.append((freq, fn))


In [2]:
import dataclasses
import os

@dataclasses.dataclass(frozen=True)
class Score:
  copy:float
  load:float
  store:float
  ch_remat:float
  ex_remat:float
  loadstore:float

def load_raw(dirname: str):
  ret:dict[str, Score] = {}
  for filename in glob.iglob(os.path.join(dirname,'**'), recursive=True):
    if not filename.endswith('.stats'):
      continue
    with open(filename) as f:
      reader = csv.reader(f)
      for l in reader:
        l = l[:-1]
        if len(l) != 7:
          print(l)
        [fct, copy, load, store, loadstore, ch_remat, ex_remat] = l
        ret[fct.split('.llvm.')[0]] = Score(
            copy=float(copy),
            load=float(load),
            store=float(store),
            ch_remat=float(ch_remat),
            ex_remat=float(ex_remat),
            loadstore=float(loadstore))
  return ret

raw_base = load_raw('/work/muppet_corpus_raw')
raw_shipped = load_raw('/work/muppet_corpus_mlgo_raw')
raw_9K = load_raw('/work/muppet_corpus_9K_raw')
raw_300K = load_raw('/work/muppet_corpus_300K_raw')
neg_262K = load_raw('/work/muppet_corpus_neg_262K_raw')


In [23]:
def shipped_reward(p, d):
  return 1.0 - (p+0.01)/(d+0.01)

def diff_reward(p,d):
  return d - p

def load_compiler_values(fname):
  mlgo = {}
  with open(fname) as f:
    r = csv.reader(f)
    for [_,fct,d,p] in r:
      fct=fct.split('.llvm.')[0]
      d = float(d)
      p = float(p)
      mlgo[fct] = diff_reward(p, d)
  return sorted([(v, f) for f, v in mlgo.items()], reverse=True), mlgo

mlgo_values, mlgo = load_compiler_values('/work/muppet_shipped.csv')
experiment_values, experiment = load_compiler_values('/work/muppet_shipped_important_303K.csv')

KeyboardInterrupt: 

In [None]:
important_set = set([fname for _, fname in important])

def get_score(s: Score)->float:
  return 1 * s.load + 1 * s.store  + 1 * s.loadstore - 3 * s.ex_remat + 1 * s.copy

def report(scores: dict[str, Score]):
  total_score = 0.0
  total_used = 0
  for f in important_set:
    if f not in scores:
      continue
    total_used += 1
    w = f_to_freq[f] / total
    numeric_score = get_score(scores[f])
    total_score += numeric_score
  return total_score

nine=report(raw_9K)
base=report(raw_base)
threeK = report(raw_300K)
shipped = report(raw_shipped)
n = report(neg_262K)
print(f'{100 * (1-nine/base)} {100 * (1-threeK/base)} {100 * (1-shipped/base)}')
print(f'{100 * (1 - n/base)}')
