In [1]:
import pandas as pd
import numpy as np
import fileinput
import shutil # For file copy
from os import listdir
from os.path import isfile, join

# Formating the files

We specify the path where the data files are stored:

In [2]:
path = '2016-01-02_odroid_tests/'


file_stages = 'unrecorded_single_threaded_inversion.csv'

### Make  a copy of the files

Make a copy of the files with a csv extension instead of txt

In [3]:
# List of files that have the words 'inversion' or 'multiplication' and it is not a csv file
files_txt = [join(path,f) for f in listdir(path) if 
         (('inversion' in f)  or ('multiplication' in f))
         and (isfile(join(path, f))) and ('.txt' in f)]

# Files with csv extension
files = [f.replace('.txt','.csv') for f in files_txt]

# Make a copy of each file with a csv extension
for i in range(len(files)):
    shutil.copy(files_txt[i], files[i])

### Lists of files depending on the type of test

In [4]:
power_multiply_files = [f for f in files if 
         ('power' in f)  and ('multiplication' in f)]

power_inversion_files = [f for f in files if 
         ('power' in f)  and ('inversion' in f)]

multiply_files = [f for f in files if 
         (('multiplication' in f)  and not ('power' in f))]

inversion_files = [f for f in files if
         ('inversion' in f)  and not ('power' in f)]

### Reformat the files to CSV.

* We ignore the lines starting with # because those are the warmup measurements
* We also ignore any line containing the word sorry

In [5]:
new_file = open(join(path,file_stages),'w')
for line in fileinput.FileInput(files, inplace=True):
    if line.rstrip():
        if line.startswith('#') or 'sorry' in line:
            continue
        elif 'stage1' in line:
            new_file.write(line.replace(' ', ','))
        else:
            print(line.replace(' ', ','), end='')
            
new_file.close()

The inversion files have a special line for one threaded non recorded inversions. We will create a new file to contain those lines because they do not fit the format

# Making the dataframes

### Multiply - Speed files

In [6]:
# Create squeleton dataframe. Only columns names
multiply_data = pd.DataFrame(columns=["1","Threads","2","Gen_Size","3","m","4","Block_Size","Strat","Act_1","5","Act_2"])

# Concatenate all the dataframes of the multiply speed files
for f in multiply_files:
    df = pd.read_csv(f,header=None,
                names=["1","Threads","2","Gen_Size","3","m","4","Block_Size","Strat","Act_1","5","Act_2"])
    multiply_data = multiply_data.append(df, ignore_index=True)

# Drop extra columns
multiply_data.drop(["1","2","3","4","5"],axis=1,inplace=True)

### Inversion - Speed files

In [7]:
# Create squeleton dataframe. Only columns names
inversion_data = pd.DataFrame(columns=["1","Threads","2","Gen_Size","3","Block_Size","4","Act_1","5","Act_2"])

# Concatenate all the dataframes of the multiply speed files
for f in inversion_files:
    df = pd.read_csv(f,header=None, index_col=False,
                 names=["1","Threads","2","Gen_Size","3","Block_Size","4","Act_1","5","Act_2"])
    inversion_data = inversion_data.append(df, ignore_index=True)

# Drop extra columns
inversion_data.drop(["1","2","3","4","5"],axis=1,inplace=True)

#### Inversion, special case (Single threaded not recorded)

In [8]:
inversion_data_special = pd.read_csv(join(path,file_stages), header=None, index_col=False,
                                     names=["1","Threads","2","Gen_Size","3","Block_Size","4",
                                            "Stage_1","5","Stage_2","6","Stage_3","7","lu_inv_blocked"])
# Drop extra columns
inversion_data_special.drop(["1","2","3","4","5","6","7"],axis=1,inplace=True)

### Power files

Multiply power

In [9]:
# Create squeleton dataframe. Only columns names
power_multiply_data = pd.DataFrame(columns=["1","Threads","2","Gen_Size","3","m","4","Block_Size",
                                            "Strat","Act_1","5","Act_2","6","Power_Wattsecs","7",
                                            "8","Power_Avg(W)","9"])

# Concatenate all the dataframes of the multiply speed files
for f in power_multiply_files:
    df = pd.read_csv(f,header=None,
                names=["1","Threads","2","Gen_Size","3","m","4","Block_Size",
                        "Strat","Act_1","5","Act_2","6","Power_Wattsecs","7",
                        "8","Power_Avg(W)","9"])
    power_multiply_data = power_multiply_data.append(df, ignore_index=True)

power_multiply_data.drop(["1","2","3","4","5","6","7","8","9"],axis=1,inplace=True)

Inversion power

In [10]:
# Create squeleton dataframe. Only columns names
power_inversion_data = pd.DataFrame(columns=["1","Threads","2","Gen_Size","4","Block_Size",
                                            "5","Act_1","5","Act_2","6","Power_Wattsecs","7",
                                            "8","Power_Avg(W)","9"])

# Concatenate all the dataframes of the multiply speed files
for f in power_inversion_files:
    df = pd.read_csv(f,header=None,
                names=["1","Threads","2","Gen_Size","4","Block_Size",
                        "5","Act_1","5","Act_2","6","Power_Wattsecs","7",
                        "8","Power_Avg(W)","9"])
    power_inversion_data = power_inversion_data.append(df, ignore_index=True)

power_inversion_data.drop(["1","2","3","4","5","6","7","8","9"],axis=1,inplace=True)

# Save dataframes as pickles

All the dataframes and names to save them to

In [11]:
data_dic = {'multiply_speed.p':multiply_data,
            'inversion_speed.p':inversion_data,
            'inversion_special.p':inversion_data_special,
            'power_multiply.p':power_multiply_data,
            'power_inversion.p':power_inversion_data}

Save dataframes

In [12]:
for name, df in data_dic.items():
    df.to_pickle(join(path,name))