In [87]:
import os
import json
import numpy as np
import pandas as pd
import re
import statistics

repo = '<path to data repo>'

# Model checking plan data

In [89]:
def extract_plan_data(run):
    data = []
    file = open(repo+'/{}/autoctrl.txt'.format(run), 'r')
    
    for line in file:
        if line.split(':')[0] == 'ACCEPT':
            data.append(line.split(':')[-1].strip())
        elif line.split(':')[0] == 'PATH':
            data.append(line.split(':')[-1].strip())
        elif line.split(':')[0] == 'PLAN':
            data.append(line.split(':')[-1].strip())
        elif line.strip()[-2:] == 'ms':
            d = line.split('!')[1].strip()[:-2]
            data.append(float(d))
            
    file.close()
    
    if len(data) == 0:
        data = [None] * 8
    
    if len(data) == 4:
        data += [None] * 4

    data_ = []
    for x in data:
        if x == '' or str(x)[0] == '*':
            data_.append(None)
        else:
            data_.append(x)

    res = []
    a = []
    count = 0
    for i in range(len(data_)):
        a.append(data_[i])
        if count == 3:
            res.append(a)
            a = []
            count = 0
        else:
            count += 1

    for item in res:
        if item[1] != None:
            nsteps = len(item[1].split(' '))
            item.append(nsteps)
        else:
            item.append(0)

    cols = ['accept', 'path', 'plan', 'latency', 'steps']
    df = pd.DataFrame(data=res, columns=cols)
    return df

def add_plan_data(n_runs):
    count = 0
    for run in os.listdir(repo):
        if run[0] == '0':
            df = extract_plan_data(run)
            df.to_csv(repo+'/{}/{}plan.csv'.format(run, run))
            count += 1
      
    try:
        assert count == n_runs
    except AssertionError:
        print('FAILED: count = {}'.format(count))
    
add_plan_data(n_runs=2)

# Model checking resource usage

In [90]:
def _extract_stack_data(data):
    result = []
    
    for item in data:
        if item[:3] != 'SET':
            result.append(item.strip())
        else:
            break
            
    result = result[2:-2]
    # get stack size data here
    result = [int(x[-1]) for x in result]
            
    return result
    
def _extract_set_data(data):
    result = []
    
    for item in reversed(data[:-5]):
        if item[:3] != 'SET':
            try:
                result.append(int(item.strip()))
            except ValueError:
                pass
        else:
            break
        
    return result
    
def extract_performance_data(run):
    perf = []
    
    for file in os.listdir(repo+'\{}'.format(run)):
        if file[:3] == 'dfs':
            with open(repo+'\{}\{}'.format(run, file)) as infile:
                perf.append(infile.readlines())
         
    set_data = []
    stack_data = []
    
    for data in perf:
        stack_data.append(max(_extract_stack_data(data)))
        set_data.append(max(_extract_set_data(data)))
    
    return stack_data, set_data

def add_performance_to_run_data(n_runs):
    count = 0
    for run in os.listdir(repo):    
        if run[0] == '0':
            stack_data, set_data = extract_performance_data(run)
    
            df = pd.DataFrame()
            df['stack_data'] = stack_data
            df['set_data'] = set_data
            df['state_size'] = 4
            df['n_states'] = 15
            df['adj_list_compile'] = 184
            df['stack_compile'] = 12
            df['set_compile'] = 24
            df.to_csv(repo+'/{}/{}model_check.csv'.format(run, run))
            count += 1
            
    try:
        assert count == n_runs
    except AssertionError:
        print('FAILED: count = {}'.format(count))

add_performance_to_run_data(n_runs=2)

# Process resource usage

In [91]:
def _extract_process_memory(run):
    swap = []
    physical = []
    process = []
    
    file = open(repo+'/{}/usage.txt'.format(run), 'r')
    
    for line in file:
        if line.split(':')[0] == 'MiB Mem ':
            physical.append(line.split(':')[-1].strip())
        elif line.split(':')[0] == 'MiB Swap':
            swap.append(line.split(':')[-1].strip())
        elif line.strip()[-8:] == 'autoctrl':
            process.append(line)

    file.close()

    # physical data
    physical_ = []
    physical = [x.split(',') for x in physical]
    for row in physical:
        physical_.append([float(x.strip()[:4]) for x in row])

    # swap data
    swap_ = []
    swap = [x.split(',') for x in swap]
    for row in swap:   
        swap_.append([x.strip() for x in row])

    swap__ = []
    for row in swap_:
        swap__.append([float(x.split()[0]) for x in row])

    # process data
    process_ = []
    for row in process:   
        process_.append(row.strip()[9:-8].strip().split(' '))

    process__ = []
    for row in process_:
        process__.append([x for x in row if x != ''])

    for i in range(len(process__)):    
        for j in range(len(process__[i])):
            if ':' in process__[i][j]:
                process__[i][j] = process__[i][j][-5:]
                
            try:
                process__[i][j] = float(process__[i][j])
            except ValueError:
                pass
                
    phy_cols = ['mem_total', 'mem_free', 'mem_used', 'cache']
    phy = pd.DataFrame(data=physical_, columns=phy_cols)

    swp_cols = ['swap_total', 'swap_free', 'swap_used']
    swp = pd.DataFrame(data=swap__, columns=swp_cols)

    proc_cols = ['pr', 'ni', 'virt', 'res', 'shr', 'status', 
                 'cpu_perc', 'mem_perc', 'cpu_time']
    proc = pd.DataFrame(data=process__, columns=proc_cols)

    df = pd.merge(proc, phy, how='inner', left_index=True, right_index=True)
    df = pd.merge(df, swp, how='inner', left_index=True, right_index=True)
    df['process_uptime'] = 0.001 * df.index
    df = df[df.mem_perc > 0]
    
    return df

def add_process_to_run_data(n_runs):
    count = 0
    for run in os.listdir(repo):    
        df = _extract_process_memory(run)
        df.to_csv(repo+'/{}/{}usage.csv'.format(run, run))

        count += 1
    try:
        assert count == n_runs
    except AssertionError:
        print('FAILED: count = {}'.format(count))

add_process_to_run_data(n_runs=4)