In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import display

# TODOs:
# automate tracefile generation and symbol analysis
# attestation_sets: 5/5
# tiny_aes_sets: 2/5
# max_elem native vs enclave: 0/3

# Attestation Datasets
att_full1 = pd.read_csv("data/att_full1.csv")
att_full2 = pd.read_csv("data/att_full2.csv")
att_full3 = pd.read_csv("data/att_full3.csv")
att_full4 = pd.read_csv("data/att_full4.csv")
att_full5 = pd.read_csv("data/att_full5.csv")

att_trim1 = pd.read_csv("data/att_trim1.csv")
att_trim2 = pd.read_csv("data/att_trim2.csv")
att_trim3 = pd.read_csv("data/att_trim3.csv")
att_trim4 = pd.read_csv("data/att_trim4.csv")
att_trim5 = pd.read_csv("data/att_trim5.csv")

# Tiny-AES Datasets
aes1 = pd.read_csv("data/tinyaes1.csv")
aes2 = pd.read_csv("data/tinyaes2.csv")


In [2]:
class Boundary:
    """A boundary is defined by a start and end symbol along with a name."""
    def __init__(self, name, start, end):
        self.name = name
        self.start = start
        self.end = end

In [3]:
"""Given a set of boundaries (in no order) gather the number of cycles it took
to reach the end of each of the boundaries, by returning a pandas dataframe of
the boundaries, their labels, and the cycle cost. This should work in the case
of multiple enclaves being started, running a user/runtime, and then destroying.
"""

def get_indicies(bset, data):
    """Return the indicies of all the found possible boundaries.
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    indicies = []
    for i, row in data.iterrows():
        if row['symbol'] in start_symbols or row['symbol'] in end_symbols:
            if i not in indicies:
                indicies.append(i)
                
    return indicies

def get_cost_table(data):
    """Return table with cycle cost in place of the relative cycle number
    """
    data["end"] = data["symbol"].shift(-1)
    data["cost"] = -(data['cycle'] - data['cycle'].shift(-1))
    # drop the last row because this is cycle cost per instruction
    data = data.drop(['cycle'], axis=1)[:-1]
    return data

def get_symbol_cost_table(phase_df, data):
    """Given a phase cycles table, extract the start and end indicies for each phase, and break
    down those symbols into a pivot table from the original data table.
    """
    # get cost table from original data
    cost_data = get_cost_table(data)
    result = []
    for _, row in phase_df.iterrows():
        start = row['start_index']
        phase = row['phases']
        end = row['end_index']
        symbol_cost_summed = cost_data[start:end].groupby('symbol').sum()
        symbol_cost_summed.reset_index(inplace=True)
        symbol_data = symbol_cost_summed.pivot(columns='symbol', values='cost').bfill().iloc[[0],:]
        symbol_data['phase'] = phase
        result.append(symbol_data)
    # merge dataframes and index on phase
    final = pd.concat(result, ignore_index=True).set_index('phase')
    # sort columns by mean value within column
    return final.reindex(final.mean().sort_values().index, axis=1)

def get_phase_cycles_table(indicies, bset, data):
    """Given indicies of interest and the corresponding data
    return the cycle count of each phase, along with the 
    start and end indicies of each phase which can be used
    for further analysis
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    
    # focus the data
    df = data.iloc[indicies]
    # boundary matching
    phases = []
    cycle_counts = []
    start_indicies = []
    end_indicies = []
    for startidx, row in df.iterrows():
        # continue up until the startidx equals the previous endidx
        if len(end_indicies) > 0:
            if startidx < end_indicies[-1]:
                continue
        start = row['symbol']
        # scan everything in front of the current row
        for endidx, next_row in df.loc[startidx:].iterrows():
            end = next_row['symbol']
            # print(start, end)
            found_end = False
            for b in bset:
                if b.start == start and b.end == end:
                    total_cycles = next_row['cycle'] - row['cycle'] 
                    phases.append(b.name)
                    cycle_counts.append(total_cycles)
                    start_indicies.append(startidx)
                    end_indicies.append(endidx)
                    found_end = True
                    break # on first one, we can stop
            if found_end:
                break
    
    labels = ["phases", "cycle_counts", "start_index", "end_index"]
    res = pd.DataFrame([phases, cycle_counts, start_indicies, end_indicies]).transpose()
    res.columns = labels
    return res

In [4]:
# Smaller bounds should come first.
create = Boundary("create_enclave", "keystone_create_enclave", "keystone_run_enclave")
run_overhead = Boundary("run_enclave", "keystone_run_enclave", "rt_base")

runtime = Boundary("enclave_runtimeuser", "rt_base", "mcall_sm_attest_enclave")
runtime2 = Boundary("enclave_runtimeuser", "return_to_encl", "mcall_sm_stop_enclave")
runtime3 = Boundary("enclave_runtimeuser", "return_to_encl", "mcall_sm_exit_enclave")

attest = Boundary("enclave_attestation", "mcall_sm_attest_enclave", "return_to_encl")
attest_ecall = Boundary("enclave_attestation_edge_call", "mcall_sm_stop_enclave", "return_to_encl")

runtime_noattest = Boundary("enclave_runtimeuser", "rt_base","mcall_sm_stop_enclave")
exit_enclave = Boundary("exit_enclave", "mcall_sm_exit_enclave", "keystone_ioctl")
destroy_bound = Boundary("destroy_enclave", "mcall_sm_destroy_enclave", "enclave_idr_remove")


enclave_bset = [create, run_overhead, runtime, runtime2, runtime3, attest, attest_ecall, runtime_noattest,
               exit_enclave, destroy_bound]

In [10]:
# Run Analysis
def get_buckets(data):
    bset = enclave_bset
    indicies = get_indicies(bset, data)
    # display(data.iloc[indicies]) # sanity check
    phase_table = get_phase_cycles_table(indicies, bset, data)
    # display(phase_table) # sanity check
    results = get_symbol_cost_table(phase_table, data)
    return results.groupby('phase').sum().replace(0, np.nan)

def mean(lst):
    return sum(lst)/len(lst)

def get_error_bounds_per_phase(lst):
    num_phases = lst[0].shape[0]
    
    print("Error bounds:")
    for i in range(num_phases):
        curr_phase_costs = [l.sum(axis=1).values[i] for l in lst]        
        avg = mean(curr_phase_costs)
        mean_sq_diff = [(c - avg)**2 for c in curr_phase_costs]
        avg_mean_sq_diff = mean(mean_sq_diff)
        std_dev = avg_mean_sq_diff**0.5
        err_bound = std_dev / avg
        print(lst[0].index[i] + ":", avg, "std dev:", std_dev, "err:", err_bound * 100)
        
def final_table(lst):
    """Given a lst of bucket tables, get the final result, which is the mean
    of those tables, and sort the indicies by avg value.
    """
    result = mean(lst)
    return result.reindex(result.mean().sort_values().index, axis=1)

In [11]:
t1 = get_buckets(att_trim1)
t2 = get_buckets(att_trim2)
t3 = get_buckets(att_trim3)
t4 = get_buckets(att_trim4)
t5 = get_buckets(att_trim5)

f1 = get_buckets(att_full1)
f2 = get_buckets(att_full2)
f3 = get_buckets(att_full3)
f4 = get_buckets(att_full4)
f5 = get_buckets(att_full5)

a1 = get_buckets(aes1)

atttrimmed = [t1, t2, t3, t4, t5]
attfull = [f1, f2, f3, f4, f5]
aes = [a1]

get_error_bounds_per_phase(atttrimmed)
get_error_bounds_per_phase(attfull)
get_error_bounds_per_phase(aes)

Error bounds:
('create_enclave:', 49494462.2, 'std dev:', 26687.224826871756, 'err:', 0.05391961775245181)
('destroy_enclave:', 240214.4, 'std dev:', 30.017328328817005, 'err:', 0.01249605699276022)
('enclave_attestation:', 1278064.4, 'std dev:', 2218.326269961207, 'err:', 0.17356920902899783)
('enclave_attestation_edge_call:', 4914812.2, 'std dev:', 471.2767339897016, 'err:', 0.009588906245282405)
('enclave_runtimeuser:', 12481.2, 'std dev:', 87.0066664112584, 'err:', 0.69710177235569)
('exit_enclave:', 81846.6, 'std dev:', 1498.771710434915, 'err:', 1.831196055101757)
('run_enclave:', 5212.2, 'std dev:', 172.11670459313353, 'err:', 3.30218918293875)
Error bounds:
('create_enclave:', 49494462.2, 'std dev:', 26687.224826871756, 'err:', 0.05391961775245181)
('destroy_enclave:', 240214.4, 'std dev:', 30.017328328817005, 'err:', 0.01249605699276022)
('enclave_attestation:', 1278064.4, 'std dev:', 2218.326269961207, 'err:', 0.17356920902899783)
('enclave_attestation_edge_call:', 4914812.2,

Unnamed: 0_level_0,hash_finalize,osm_pmp_set,work_pending,hash_extend,get_enclave_by_id,swake_up,sbi_set_timer,next_event,utm_init,blk_delay_work,...,uart_putchar,sha3_keccakf,___slab_alloc.isra.10.con,__slab_free.isra.14,bio_attempt_back_merge,cfq_allow_bio_merge,elv_bio_merge_ok,finish_fault,ll_back_merge_fn,put_cpu_partial
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
create_enclave,2.0,,6.2,8.4,,12.0,,20.8,22.0,26.4,...,,48231909.6,,,,,,,,
destroy_enclave,,,,,,,,,,,...,,,,,,,,,,
enclave_attestation,,,,,,,,,,,...,,308692.2,,,,,,,,
enclave_attestation_edge_call,,12.0,,,10.0,,,,,,...,4421833.8,,,,,,,,,
enclave_runtimeuser,,,,,,,18.0,,,,...,,,,,,,,,,
exit_enclave,,6.0,9.8,,,,,,,,...,,,,,,,,,,
run_enclave,,6.0,,,10.0,,,,,,...,,,,,,,,,,


In [None]:
finalatt= final_table(attfull)
finalatt

In [None]:
finalaes= final_table(aes)
finalaes

In [12]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

import cufflinks as cf

# You'll need to set API key using:
# plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')
# Find your api_key here: https://plot.ly/settings/api
# Cufflinks binds plotly to pandas dataframes in IPython notebook.

cf.set_config_file(offline=False, world_readable=True, theme='ggplot')

finalatt.iplot(kind='bar', barmode="stack", filename='name.html')

In [15]:
# analyze pain points
# for all the phases, get the top offenders
def get_top_x(x, df):
    num_phases = df.shape[0]
    for i in range(num_phases):
        phase_data = df.iloc[[i]].transpose()
        phase = phase_data.columns[0]
        phase_data = phase_data[phase_data[phase].notnull()]
        phase_data.columns=["total_cost"]
        print(phase)
        return(phase_data.sort_values("total_cost", ascending=False)[:x])

get_top_x(15, finalatt)

create_enclave


Unnamed: 0,total_cost
sha3_keccakf,48231909.6
memset,216151.6
__copy_user,68160.8
init_encl_pgtable,67801.0
restore_regs,34903.2
__cpuidle_text_end,30753.8
kmem_cache_alloc,24459.0
__update_load_avg_cfs_rq.,23483.8
hash_epm,23000.4
_raw_spin_unlock_irqresto,22608.8


In [None]:
att_trim = get_buckets(att_trim2)
att_trim.iplot(kind='bar', barmode="stack", filename='name2.html')

In [13]:
import plotly.plotly as py
import plotly.graph_objs as go

import pandas as pd
import numpy as np

N = 20
x = np.linspace(1, 10, N)
y = np.random.randn(N)+3
y2 = np.random.randn(N)+6
y3 = np.random.randn(N)+9
y4 = np.random.randn(N)+12
df = pd.DataFrame({'x': x, 'y': y, 'y2':y2, 'y3':y3, 'y4':y4})
df.head()

data = [
    go.Bar(
        x=df['x'], # assign x as the dataframe column 'x'
        y=df['y']
    ),
    go.Bar(
        x=df['x'],
        y=df['y2']
    ),
    go.Bar(
        x=df['x'],
        y=df['y3']
    ),
    go.Bar(
        x=df['x'],
        y=df['y4']
    )

]

layout = go.Layout(
    barmode='stack',
    title='Stacked Bar with Pandas'
)

fig = go.Figure(data=data, layout=layout)

# IPython notebook
py.iplot(fig, filename='pandas-bar-chart-layout')