### Goal : Generate the configurations for poppler

##### Libraries

In [1]:
import os, time 
import numpy as np
import pandas as pd
from scipy import stats

##### Runtime configs

In [50]:
cols = ["format", "-j", "-jp2", "-jbig2", "-ccitt"]

In [51]:
df =  pd.DataFrame([[0, 1, 1, 1, 1],
[0, 1, 1, 0, 1],
[0, 1, 0, 1, 1],
[0, 1, 0, 0, 1],
[0, 0, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 1],
[0, 0, 1, 1, 0],
[1, 1, 1, 1, 1],
[1, 1, 1, 0, 1],
[1, 1, 0, 1, 1],
[1, 1, 0, 0, 1],
[1, 0, 1, 0, 0],
[1, 0, 0, 0, 0],
[1, 0, 0, 0, 1],
[1, 0, 1, 1, 0]])

In [52]:
df.columns = cols
df

Unnamed: 0,format,-j,-jp2,-jbig2,-ccitt
0,0,1,1,1,1
1,0,1,1,0,1
2,0,1,0,1,1
3,0,1,0,0,1
4,0,0,1,0,0
5,0,0,0,0,0
6,0,0,0,0,1
7,0,0,1,1,0
8,1,1,1,1,1
9,1,1,1,0,1


In [53]:
for j in range(16):
    with open("./scripts/"+str(j)+".sh","w") as f:
        f.write('#!/bin/bash\nnumb="'+str(j))
        f.write('"\nlogfilename="./logs/$numb.log"\ninputlocation="$1"\n')
        f.write("# execute the configuration and measure its time\n")
        vals = df.iloc[j]
        cmd_line = "{ time ./poppler/build/utils/pdfimages $inputlocation "
        list_conf = ""
        if vals[0]:
            cmd_line+="-png "
            list_conf+="png,"
        else:
            cmd_line+="-tiff "
            list_conf+="tiff,"
        for i in range(1,len(vals)):
            if vals[i]:
                cmd_line+=cols[i]+" "
            list_conf+=str(vals[i])+","
        cmd_line+= " -q ./output/ ; } 2> $logfilename\n"
        f.write(cmd_line)
        f.write("# compress the images\n")
        f.write("eval `tar czf output.tar.gz ./output/`\n")
        f.write("# extract compressed images sizes\n")
        f.write("size=`ls -lrt output.tar.gz | awk '{print $5}'`\n")
        f.write("# analyze log to extract relevant timing information and CPU usage\n")
        f.write("""time=`grep "real" $logfilename | sed 's/real//;s/\t//' | cut -d "%" -f 1`\n""")
        f.write("# clean\n")
        f.write("eval `rm output.tar.gz`\n")
        f.write("eval `rm ./output/*`\n")
        f.write('''csvLine="$numb,''')
        f.write(list_conf+'''"\n''')
        f.write('''csvLine="$csvLine$size,$time"\n''')
        f.write('''echo $csvLine''')

In [54]:
expl = []
for i in range(len(df["format"])):
    if df["format"][i]:
        expl.append("png")
    else:
        expl.append("tiff")
df["explicit"] = expl
df.to_csv("runtime_configs.csv")

##### Compile configs

In [42]:
df = pd.DataFrame(np.random.randint(0,2,7*40).reshape(40,7)).drop_duplicates()
df

Unnamed: 0,0,1,2,3,4,5,6
0,0,0,0,0,0,0,0
1,1,1,0,1,0,0,0
2,0,0,0,0,1,1,1
3,0,0,0,0,0,1,0
4,1,1,0,0,1,0,1
5,0,0,1,0,1,0,0
6,0,0,1,0,0,0,1
7,1,0,0,1,1,1,1
8,1,0,1,0,0,1,1
9,0,0,1,0,0,1,1


In [43]:
comp_cols = ["ENABLE_LIBOPENJPEG",
"ENABLE_LIBJPEG",
"ENABLE_ZLIB",
"ENABLE_LIBTIFF",
"ENABLE_NSS3",
"HAVE_CAIRO",
"WORDS_BIGENDIAN"] 

In [47]:
with open("cmd_lines.txt", "w") as f:
    for i in range(30):
        cmd_line = "cmake"
        val_cols = df.iloc[i]
        for j in range(len(comp_cols)):
            if j == 0:
                if val_cols[j]:
                    cmd_line+=" -DENABLE_LIBOPENJPEG=openjpeg2"
                else:
                    cmd_line+=" -DENABLE_LIBOPENJPEG=unmaintained"
            else:
                if val_cols[j]:
                    cmd_line+=" -D"+comp_cols[j]+"=ON"
                else:
                    cmd_line+=" -D"+comp_cols[j]+"=OFF"
        f.write(cmd_line+" ..\n")

In [48]:
df.columns =comp_cols
df.to_csv("ctime_configs.csv")