### Goal : Generate the configurations for xz

##### Libraries

In [1]:
import os, time 
import numpy as np
import pandas as pd
from scipy import stats

##### Runtime configs

In [2]:
cols = ["--memory", "--format", " ", "--threads"]
dico = dict({"--memory" : ["50%","75%","100%"],
             "--format" : ["xz","lzma"],
             " " : ["-3","-6","-9"],
             "--threads" : ["1","16","32"]})

In [3]:
for k in dico.keys():
    print(dico[k])

['50%', '75%', '100%']
['xz', 'lzma']
['-3', '-6', '-9']
['1', '16', '32']


In [4]:
df = pd.DataFrame({})

nb_conf = 60
for d in dico.keys():
    df[d] = np.random.randint(0,len(dico[d]),nb_conf)

In [5]:
df = df.drop_duplicates()
df

Unnamed: 0,--memory,--format,Unnamed: 3,--threads
0,1,0,0,1
1,0,0,0,0
2,1,0,2,1
3,2,0,1,2
4,0,1,0,0
5,1,0,0,0
6,2,1,1,1
7,1,0,1,2
8,0,0,0,2
9,1,0,2,2


In [6]:
for j in range(30):
    with open("./scripts/"+str(j)+".sh","w") as f:
        f.write('#!/bin/bash\nnumb="'+str(j))
        f.write('"\nlogfilename="./logs/$numb.log"\ninputlocation="$1"\n')
        f.write("# execute the configuration and measure its time\n")
        vals = df.iloc[j]
        format_compression = "xz"*(1-vals[1])+"lzma"*vals[1]
        cmd_line = "{ time ./xz/src/xz/xz "
        list_conf = ""
        for i in range(len(vals)):
            cmd_line+=cols[i]+" "+dico[cols[i]][vals[i]]+" "
            list_conf+=str(dico[cols[i]][vals[i]])+","
        cmd_line+= " -k $inputlocation ; } 2> $logfilename\n"
        f.write(cmd_line)
        f.write("# extract compressed images sizes\n")
        f.write("size=`ls -lrt $inputlocation."+format_compression+" | awk '{print $5}'`\n")
        f.write("# analyze log to extract relevant timing information and CPU usage\n")
        f.write("""time=`grep "real" $logfilename | sed 's/real//;s/\t//' | cut -d "%" -f 1`\n""")
        f.write("# clean\n")
        f.write("eval `rm $inputlocation."+format_compression+"`\n")
        f.write('''csvLine="$numb,''')
        f.write(list_conf+'''"\n''')
        f.write('''csvLine="$csvLine$size,$time"\n''')
        f.write('''echo $csvLine''')

In [7]:
df.to_csv("runtime_configs.csv")

##### Compile configs

In [8]:
nb_copt= 6
df = pd.DataFrame(np.random.randint(0,2, nb_copt*40).reshape(40, nb_copt)).drop_duplicates()
df

Unnamed: 0,0,1,2,3,4,5
0,0,1,0,0,1,0
1,0,1,1,1,0,0
2,1,0,0,0,0,0
3,1,0,0,1,1,0
4,1,0,1,0,0,0
5,0,1,0,0,1,1
6,0,1,0,1,0,1
7,1,0,1,0,1,0
8,1,1,1,1,1,1
9,0,1,1,0,0,1


In [12]:
comp_cols = ["--disable-scripts",
 "--disable-decoders",
 "--enable-small",
 "--disable-threads",
 "--disable-doc",
 "--enable-debug"]

In [13]:
with open("cmd_lines.txt", "w") as f:
    for i in range(30):
        cmd_line = "./configure"
        val_cols = df.iloc[i]
        for j in range(len(comp_cols)):
            if val_cols[j] == 1:
                cmd_line+=" "+comp_cols[j]
        f.write(cmd_line+"\n")

In [14]:
df.columns =comp_cols
df.to_csv("ctime_configs.csv")