In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import display

# TODOs:
# automate tracefile generation and symbol analysis
# TODO: get an error bound based on the data (take an average, check how much it varies by)
# attestation_sets: 2/5
# tiny_aes_sets: 0/5
# TODO: Sum by 

att_full1 = pd.read_csv("data/att_full1.csv")
att_full2 = pd.read_csv("data/att_full2.csv")

# Note, get an average of all the data, and run computations on that. Then 

att_trim1 = pd.read_csv("data/att_trim1.csv")
att_trim2 = pd.read_csv("data/att_trim2.csv")

Unnamed: 0,cycle,env,symbol
0,51884102315,linuxlinux,keystone_create_enclavekeystone_create_enclave
1,51884102527,linuxlinux,calculate_required_pagescalculate_required_pages
2,51884102783,linuxlinux,create_enclavecreate_enclave
3,51884506924,linuxlinux,epm_initepm_init
4,51884507052,linuxlinux,init_free_pagesinit_free_pages
5,51884518478,linuxlinux,get_free_page.part.0get_free_page.part.0
6,51884520993,linuxlinux,keystone_rtld_init_runtimkeystone_rtld_init_ru...
7,51885484518,linuxlinux,rtld_vm_mmaprtld_vm_mmap
8,51885484821,linuxlinux,epm_alloc_rt_pageepm_alloc_rt_page
9,51885485041,linuxlinux,epm_alloc_pageepm_alloc_page


In [2]:
class Boundary:
    """A boundary is defined by a start and end symbol along with a name."""
    def __init__(self, name, start, end):
        self.name = name
        self.start = start
        self.end = end

In [3]:
"""Given a set of boundaries (in no order) gather the number of cycles it took
to reach the end of each of the boundaries, by returning a pandas dataframe of
the boundaries, their labels, and the cycle cost. This should work in the case
of multiple enclaves being started, running a user/runtime, and then destroying.
"""

def get_indicies(bset, data):
    """Return the indicies of all the found possible boundaries.
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    indicies = []
    for i, row in data.iterrows():
        if row['symbol'] in start_symbols or row['symbol'] in end_symbols:
            if i not in indicies:
                indicies.append(i)
                
    return indicies

def get_cost_table(data):
    """Return table with cycle cost in place of the relative cycle number
    """
    data["end"] = data["symbol"].shift(-1)
    data["cost"] = -(data['cycle'] - data['cycle'].shift(-1))
    # drop the last row because this is cycle cost per instruction
    data = data.drop(['cycle'], axis=1)[:-1]
    return data

def get_symbol_cost_table(phase_df, data):
    """Given a phase cycles table, extract the start and end indicies for each phase, and break
    down those symbols into a pivot table from the original data table.
    """
    # get cost table from original data
    cost_data = get_cost_table(data)
    result = []
    for _, row in phase_df.iterrows():
        start = row['start_index']
        phase = row['phases']
        end = row['end_index']
        symbol_cost_summed = cost_data[start:end].groupby('symbol').sum()
        symbol_cost_summed.reset_index(inplace=True)
        symbol_data = symbol_cost_summed.pivot(columns='symbol', values='cost').bfill().iloc[[0],:]
        symbol_data['phase'] = phase
        result.append(symbol_data)
    # merge dataframes and index on phase
    final = pd.concat(result, ignore_index=True).set_index('phase')
    # sort columns by mean value within column
    return final.reindex(final.mean().sort_values().index, axis=1)

def get_phase_cycles_table(indicies, bset, data):
    """Given indicies of interest and the corresponding data
    return the cycle count of each phase, along with the 
    start and end indicies of each phase which can be used
    for further analysis
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    
    # focus the data
    df = data.iloc[indicies]
    # boundary matching
    phases = []
    cycle_counts = []
    start_indicies = []
    end_indicies = []
    for startidx, row in df.iterrows():
        # continue up until the startidx equals the previous endidx
        if len(end_indicies) > 0:
            if startidx < end_indicies[-1]:
                continue
        start = row['symbol']
        # scan everything in front of the current row
        for endidx, next_row in df.loc[startidx:].iterrows():
            end = next_row['symbol']
            # print(start, end)
            found_end = False
            for b in bset:
                if b.start == start and b.end == end:
                    total_cycles = next_row['cycle'] - row['cycle'] 
                    phases.append(b.name)
                    cycle_counts.append(total_cycles)
                    start_indicies.append(startidx)
                    end_indicies.append(endidx)
                    found_end = True
                    break # on first one, we can stop
            if found_end:
                break
    
    labels = ["phases", "cycle_counts", "start_index", "end_index"]
    res = pd.DataFrame([phases, cycle_counts, start_indicies, end_indicies]).transpose()
    res.columns = labels
    return res

In [4]:
# Smaller bounds should come first.
create = Boundary("create_enclave", "keystone_create_enclave", "keystone_run_enclave")
run_overhead = Boundary("run_enclave", "keystone_run_enclave", "rt_base")

runtime = Boundary("enclave_runtimeuser", "rt_base", "mcall_sm_attest_enclave")
runtime2 = Boundary("enclave_runtimeuser", "return_to_encl", "mcall_sm_stop_enclave")
runtime3 = Boundary("enclave_runtimeuser", "return_to_encl", "mcall_sm_exit_enclave")

attest = Boundary("enclave_attestation", "mcall_sm_attest_enclave", "return_to_encl")
attest_ecall = Boundary("enclave_attestation_edge_call", "mcall_sm_stop_enclave", "return_to_encl")

runtime_noattest = Boundary("enclave_runtimeuser", "rt_base","mcall_sm_stop_enclave")
exit_enclave = Boundary("exit_enclave", "mcall_sm_exit_enclave", "keystone_ioctl")
destroy_bound = Boundary("destroy_enclave", "mcall_sm_destroy_enclave", "enclave_idr_remove")


enclave_bset = [create, run_overhead, runtime, runtime2, runtime3, attest, attest_ecall, runtime_noattest,
               exit_enclave, destroy_bound]

In [16]:
# Run Analysis
def get_buckets(data):
    bset = enclave_bset
    indicies = get_indicies(bset, data)
    # display(data.iloc[indicies]) # sanity check
    phase_table = get_phase_cycles_table(indicies, bset, data)
    # display(phase_table) # sanity check
    results = get_symbol_cost_table(phase_table, data)
    return results


In [38]:
t1 = get_buckets(att_trim1)
t2 = get_buckets(att_trim2)
# res = (t1 + t2)/2

# taking the mean: sum the elements, divide by the number of elements
# taking the stddev: subtract the mean, square the result

lst = [t1, t2]
(sum(lst)/len(lst)).iloc[[0]].sum(axis=1)


phase
create_enclave    49480839.0
dtype: float64

In [6]:
att = get_buckets(att_full2)
display(att)

Unnamed: 0,cycle,env,symbol
0,4873271364,linux,keystone_create_enclave
4498,4922757010,linux,keystone_ioctl
4501,4922757616,linux,keystone_run_enclave
4522,4922762984,runtime,rt_base
4560,4922766487,sm,mcall_sm_attest_enclave
8713,4924043238,runtime,return_to_encl
8744,4924050238,sm,mcall_sm_stop_enclave
15882,4928959112,linux,keystone_ioctl
15906,4928965538,runtime,return_to_encl
15919,4928967454,sm,mcall_sm_exit_enclave


Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,49486252,0,4501
1,run_enclave,5368,4501,4522
2,enclave_runtimeuser,3503,4522,4560
3,enclave_attestation,1276751,4560,8713
4,enclave_runtimeuser,7000,8713,8744
5,enclave_attestation_edge_call,4915300,8744,15906
6,enclave_runtimeuser,1916,15906,15919
7,exit_enclave,78934,15919,16215
8,destroy_enclave,240222,16227,16249


Unnamed: 0_level_0,hash_finalize,hash_extend,work_pending,osm_pmp_set,get_cycles_inline,get_enclave_by_id,swake_up,utm_init,sbi_set_timer,next_event,...,kmem_cache_alloc,hvc_write_room,restore_all,init_encl_pgtable,memset,fe_sq,fe_cmov,fe_mul,uart_putchar,sha3_keccakf
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
create_enclave,2.0,7.0,7.0,,,,12.0,13.0,,19.0,...,23562.0,,10168.0,67511.0,217048.0,,,,,48231090.0
run_enclave,,,,6.0,,10.0,,,,,...,,,,,,,,,,
enclave_runtimeuser,,,,,8.0,,,,18.0,,...,,,,,,,,,,
enclave_attestation,,,,,,,,,,,...,,,,,,155601.0,200614.0,527051.0,,308796.0
enclave_runtimeuser,,,,,,,,,,,...,,,,,,,,,,
enclave_attestation_edge_call,,,,12.0,,10.0,,,,,...,,29548.0,157743.0,,5441.0,,,,4420370.0,
enclave_runtimeuser,,,,,,,,,,,...,,,,,,,,,,
exit_enclave,,,8.0,6.0,,,,,,,...,,,27656.0,,504.0,,,,,
destroy_enclave,,,,,,,,,,,...,,,,,225820.0,,,,,


In [7]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

import cufflinks as cf

# You'll need to set API key using:
# plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')
# Find your api_key here: https://plot.ly/settings/api
# Cufflinks binds plotly to pandas dataframes in IPython notebook.

cf.set_config_file(offline=False, world_readable=True, theme='ggplot')

att.iplot(kind='bar', barmode="stack", filename='name.html')

In [10]:
# analyze pain points
# for all the phases, get the top offenders
def get_top_x(x, df):
    num_phases = df.shape[0]
    for i in range(num_phases):
        phase_data = df.iloc[[i]].transpose()
        phase = phase_data.columns[0]
        phase_data = phase_data[phase_data[phase].notnull()]
        phase_data.columns=["total_cost"]
        print(phase)
        return(phase_data.sort_values("total_cost", ascending=False)[:x])

get_top_x(15, att)

create_enclave


Unnamed: 0,total_cost
sha3_keccakf,48231090.0
memset,217048.0
__copy_user,73571.0
init_encl_pgtable,67511.0
restore_regs,33777.0
__cpuidle_text_end,31397.0
kmem_cache_alloc,23562.0
__update_load_avg_cfs_rq.,23167.0
hash_epm,22746.0
_raw_spin_unlock_irqresto,22356.0


run_enclave


Unnamed: 0,total_cost
pmp_set,1193.0
swap_prev_state,835.0
__local_bh_enable_ip,466.0
trap_vector,439.0
__radix_tree_lookup,433.0
run_enclave,358.0
restore_regs,341.0
_raw_spin_lock_bh,269.0
swap_prev_stvec,255.0
get_host_satp,181.0


enclave_runtimeuser


Unnamed: 0,total_cost
encl_trap_handler,688.0
unknown,679.0
restore_regs,470.0
trap_vector,398.0
handle_syscall,311.0
mcall_trap,277.0
init_timer,205.0
emulate_system_opcode,145.0
illegal_insn_trap,123.0
rt_base,121.0


enclave_attestation


Unnamed: 0,total_cost
fe_mul,527051.0
sha3_keccakf,308796.0
fe_cmov,200614.0
fe_sq,155601.0
fe_add,18137.0
fe_sub,15077.0
sc_muladd,7965.0
sc_reduce,6505.0
cmov,5363.0
memcpy,4297.0


enclave_runtimeuser


Unnamed: 0,total_cost
__copy_user,3947.0
trap_vector,535.0
dispatch_edgecall_ocall,459.0
encl_trap_handler,447.0
unknown,407.0
handle_syscall,307.0
return_to_encl,294.0
mcall_trap,269.0
edge_call_get_offset_from,264.0
copy_from_user,71.0


enclave_attestation_edge_call


Unnamed: 0,total_cost
uart_putchar,4420370.0
restore_all,157743.0
restore_regs,46357.0
trap_vector,42094.0
hvc_write_room,29548.0
mcall_trap,17883.0
_raw_spin_unlock_irqresto,17028.0
_raw_spin_lock_irqsave,12571.0
unlock_page,11444.0
memcpy,8006.0


enclave_runtimeuser


Unnamed: 0,total_cost
unknown,523.0
encl_trap_handler,457.0
return_to_encl,429.0
handle_syscall,214.0
trap_vector,167.0
mcall_trap,126.0


exit_enclave


Unnamed: 0,total_cost
restore_all,27656.0
__mod_node_page_state,2567.0
__cpuidle_text_end,2026.0
_vm_normal_page,1757.0
_raw_spin_unlock_irqresto,1707.0
trap_vector,1705.0
handle_exception,1590.0
mark_page_accessed,1565.0
free_unref_page_list,1455.0
vmacache_find,1431.0


destroy_enclave


Unnamed: 0,total_cost
memset,225820.0
__free_pages_ok,5608.0
kfree,1477.0
__mod_zone_page_state,1421.0
pmp_region_free_atomic,1093.0
destroy_enclave,948.0
pmp_unset,624.0
restore_regs,571.0
send_and_sync_pmp_ipi,564.0
free_pages.part.23,514.0


In [None]:
att_trim = get_buckets(att_trim2)
att_trim.iplot(kind='bar', barmode="stack", filename='name2.html')