In [583]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import display

# TODOs:
# automate tracefile generation and symbol analysis
# get an error bound based on the data
# analyze trace with full symbols list
# filter out useless symbol (or just bar chart/visualize everything)
# Get CONSISTENT data

att = pd.read_csv("data/attestation.csv") # correct data
# att2 = pd.read_csv("data/attestation2.csv") # bad data
# attfull = pd.read_csv("data/attestationfull.csv")

# aes = pd.read_csv("data/tinyaes.csv")
# aes2 = pd.read_csv("data/tinyaes2.csv")
# aesfull = pd.read_csv("data/tinyaesfull.csv")

In [584]:
class Boundary:
    """A boundary is defined by a start and end symbol along with a name."""
    def __init__(self, name, start, end):
        self.name = name
        self.start = start
        self.end = end

In [585]:
"""Given a set of boundaries (in no order) gather the number of cycles it took
to reach the end of each of the boundaries, by returning a pandas dataframe of
the boundaries, their labels, and the cycle cost. This should work in the case
of multiple enclaves being started, running a user/runtime, and then destroying.
"""

def get_indicies(bset, data):
    """Return the indicies of all the found possible boundaries.
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    indicies = []
    for i, row in data.iterrows():
        if row['symbol'] in start_symbols or row['symbol'] in end_symbols:
            if i not in indicies:
                indicies.append(i)
                
    return indicies

def get_cost_table(data):
    """Return table with cycle cost in place of the relative cycle number
    """
    data["end"] = data["symbol"].shift(-1)
    data["cost"] = -(data['cycle'] - data['cycle'].shift(-1))
    # drop the last row because this is cycle cost per instruction
    data = data.drop(['cycle'], axis=1)[:-1]
    return data

def get_symbol_cost_table(phase_df, data):
    """Given a phase cycles table, extract the start and end indicies for each phase, and break
    down those symbols into a pivot table from the original data table.
    """
    # get cost table from original data
    cost_data = get_cost_table(data)
    result = []
    for _, row in phase_df.iterrows():
        start = row['start_index']
        phase = row['phases']
        end = row['end_index']
        symbol_data = cost_data[start:end].pivot(columns='symbol', values='cost').bfill().iloc[[0],:]
        symbol_data['phase'] = phase
        # display(symbol_data)
        result.append(symbol_data)
    # merge dataframes and index on phase
    final = pd.concat(result, ignore_index=True).set_index('phase')
    # sort columns by mean value within column
    return final.reindex(final.mean().sort_values().index, axis=1)

def get_phase_cycles_table(indicies, bset, data):
    """Given indicies of interest and the corresponding data
    return the cycle count of each phase, along with the 
    start and end indicies of each phase which can be used
    for further analysis
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    
    # focus the data
    df = data.iloc[indicies]
    # boundary matching
    phases = []
    cycle_counts = []
    start_indicies = []
    end_indicies = []
    for startidx, row in df.iterrows():
        # continue up until the startidx equals the previous endidx
        if len(end_indicies) > 0:
            if startidx < end_indicies[-1]:
                continue
        start = row['symbol']
        # scan everything in front of the current row
        for endidx, next_row in df.loc[startidx:].iterrows():
            end = next_row['symbol']
            # print(start, end)
            found_end = False
            for b in bset:
                if b.start == start and b.end == end:
                    total_cycles = next_row['cycle'] - row['cycle'] 
                    phases.append(b.name)
                    cycle_counts.append(total_cycles)
                    start_indicies.append(startidx)
                    end_indicies.append(endidx)
                    found_end = True
                    break # on first one, we can stop
            if found_end:
                break
    
    labels = ["phases", "cycle_counts", "start_index", "end_index"]
    res = pd.DataFrame([phases, cycle_counts, start_indicies, end_indicies]).transpose()
    res.columns = labels
    return res


# Macro boundaries for the enclave
# We will also want to have smaller microboundaries for the enclave within these larger bucket.
# For these, we will not create separate boundaries.
# With the boundaries within boundaries, we can identify what's slowing down the enclave.
# Smaller bounds should come first.
# create_bound = Boundary("create_enclave", "keystone_create_enclave", "keystone_run_enclave")
# run_bound = Boundary("run_enclave", "keystone_run_enclave", "rt_base")
# runtime_attest_bound = Boundary("enclave_runtimeuser4", "rt_base", "mcall_sm_not_implemented") # weird case
# runtime_bound = Boundary("enclave_runtimeuser", "rt_base", "mcall_sm_attest_enclave")
# attest_copy_bound = Boundary("enclave_attestation2", "mcall_sm_not_implemented", "return_to_encl") # weird case
# attest_bound = Boundary("enclave_attestation", "mcall_sm_attest_enclave", "return_to_encl")
# return_to_runtime_bound = Boundary("enclave_runtimeuser1", "return_to_encl", "mcall_sm_stop_enclave")
# attest_edge_call_bound2 = Boundary("enclave_attestation_edge_call", "dispatch_edgecall_ocall", "return_to_encl")
# attest_edge_call_bound = Boundary("enclave_attestation_edge_call", "mcall_sm_stop_enclave", "return_to_encl")
# run_to_exit_bound = Boundary("enclave_runtimeuser_until_ocall", "return_to_encl", "dispatch_edgecall_ocall")
# run_to_exit_bound = Boundary("enclave_runtimeuser2", "return_to_encl", "mcall_sm_exit_enclave")
# run_wout_attest_bound = Boundary("enclave_runtimeuser3", "rt_base","mcall_sm_stop_enclave")
# destroy_bound = Boundary("destroy_enclave", "destroy_enclave", "keystone_destroy_enclave_end")
# destroy_bound_full = Boundary("destroy_enclave", "destroy_enclave", "_raw_spin_unlock_bh")

# enclave_bset = [create_bound, run_bound, runtime_attest_bound, runtime_bound, attest_copy_bound, attest_bound, return_to_runtime_bound, 
                # attest_edge_call_bound2, attest_edge_call_bound, run_to_exit_bound, run_wout_attest_bound, destroy_bound, destroy_bound_full]

native_bset = [] # we just want user mode?
ignore_set = ["printf"]

In [586]:
# Smaller bounds should come first.
create = Boundary("create_enclave", "keystone_create_enclave", "keystone_run_enclave")
run_overhead = Boundary("run_enclave", "keystone_run_enclave", "rt_base")

runtime = Boundary("enclave_runtimeuser", "rt_base", "mcall_sm_attest_enclave")
runtime2 = Boundary("enclave_runtimeuser2", "return_to_encl", "mcall_sm_stop_enclave")
runtime3 = Boundary("enclave_runtimeuser3", "return_to_encl", "mcall_sm_exit_enclave")

attest = Boundary("enclave_attestation", "mcall_sm_attest_enclave", "return_to_encl")
attest_ecall = Boundary("enclave_attestation_edge_call", "mcall_sm_stop_enclave", "return_to_encl")

runtime_noattest = Boundary("enclave_runtimeuser", "rt_base","mcall_sm_stop_enclave")
exit_enclave = Boundary("exit_enclave", "mcall_sm_exit_enclave", "keystone_ioctl")
destroy_bound = Boundary("destroy_enclave", "destroy_enclave", "keystone_destroy_enclave_end")


enclave_bset = [create, run_overhead, runtime, runtime2, runtime3, attest, attest_ecall, runtime_noattest,
               exit_enclave, destroy_bound]

In [587]:
# Run Analysis
def get_large_buckets(data):
    bset = enclave_bset
    indicies = get_indices(bset, data)
    display(data.iloc[indicies])
    phase_table = get_phase_cycles_table(indicies, bset, data)
    display(phase_table)
    results = get_symbol_cost_table(phase_table, data)
    return results

In [588]:
att = get_large_buckets(att)
display(att)
# display(aes[:50])
# aes2res = get_large_buckets(aes2)
# display(aes2res)
# attres = get_large_buckets(att)
# display(attres)
# att2res = get_large_buckets(att2) # looks odd.
# display(att2res)

Unnamed: 0,cycle,env,symbol
0,13718035263,linux,keystone_create_enclave
129,13767547891,linux,keystone_ioctl
130,13767548582,linux,keystone_run_enclave
142,13767553680,runtime,rt_base
169,13767557263,sm,mcall_sm_attest_enclave
183,13768833879,runtime,return_to_encl
212,13768840594,sm,mcall_sm_stop_enclave
220,13773741999,linux,keystone_ioctl
233,13773748722,runtime,return_to_encl
244,13773750973,sm,mcall_sm_exit_enclave


Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,49513319,0,130
1,run_enclave,5098,130,142
2,enclave_runtimeuser,3583,142,169
3,enclave_attestation,1276616,169,183
4,enclave_runtimeuser2,6715,183,212
5,enclave_attestation_edge_call,4908128,212,233
6,enclave_runtimeuser3,2251,233,244
7,exit_enclave,84612,244,252


Unnamed: 0_level_0,osm_pmp_set,utm_init,utm_alloc_page,epm_alloc_user_page_noexe,unknown,epm_alloc_rt_page_noexec,mcall_sm_stop_enclave,mcall_sm_attest_enclave,mcall_sm_exit_enclave,get_cycles64,...,init_free_pages,keystone_mmap,swap_prev_mepc,enclave_idr_alloc,hash_finalize,create_enclave,keystone_app_load_elf,keystone_rtld_init_runtim,sign,hash_extend_page
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
create_enclave,,13.0,14.0,52.0,,53.0,,,,,...,5904.0,8896.0,,46707.0,77072.0,202054.0,233721.0,516176.0,,4383322.0
run_enclave,6.0,,,,,,,,,,...,,,60.0,,,,,,,
enclave_runtimeuser,,,,,152.0,,,,,58.0,...,,,,,,,,,,
enclave_attestation,,,,,,,,54.0,,,...,,,,,,,,,1269449.0,
enclave_runtimeuser2,,,,,2.0,,,,,,...,,,,,,,,,,
enclave_attestation_edge_call,6.0,,,,,,54.0,,,,...,,,56.0,,,,,,,
enclave_runtimeuser3,,,,,2.0,,,,,,...,,,,,,,,,,
exit_enclave,6.0,,,,,,,,54.0,,...,,,83529.0,,,,,,,


In [616]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

import cufflinks as cf

# You'll need to set API key using:
# plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')
# Find your api_key here: https://plot.ly/settings/api
# Cufflinks binds plotly to pandas dataframes in IPython notebook.

cf.set_config_file(offline=False, world_readable=True, theme='ggplot')

att_sorted.iplot(kind='bar', barmode="stack", filename='name.html')