In [438]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from IPython.display import display

# TODOs:
# automate tracefile generation and symbol analysis
# get an error bound based on the data
# analyze trace with full symbols list
# filter out useless symbol (or just bar chart/visualize everything)
# Get CONSISTENT data

att = pd.read_csv("data/attestation.csv") # missing destroy enclave.
att2 = pd.read_csv("data/attestation2.csv")
attfull = pd.read_csv("data/attestationfull.csv")

aes = pd.read_csv("data/tinyaes.csv")
aes2 = pd.read_csv("data/tinyaes2.csv")
aesfull = pd.read_csv("data/tinyaesfull.csv")

In [439]:
class Boundary:
    """A boundary is defined by a start and end symbol along with a name."""
    def __init__(self, name, start, end):
        self.name = name
        self.start = start
        self.end = end

In [441]:
"""Given a set of boundaries (in no order) gather the number of cycles it took
to reach the end of each of the boundaries, by returning a pandas dataframe of
the boundaries, their labels, and the cycle cost. This should work in the case
of multiple enclaves being started, running a user/runtime, and then destroying.
"""

def get_indicies(bset, data):
    """Return the indicies of all the found possible boundaries.
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    indicies = []
    for i, row in data.iterrows():
        if row['symbol'] in start_symbols or row['symbol'] in end_symbols:
            if i not in indicies:
                indicies.append(i)
                
    return indicies

def get_cost_table(data):
    """Return table with cycle cost in place of the relative cycle number
    """
    data["end"] = data["symbol"].shift(-1)
    data["cost"] = -(data['cycle'] - data['cycle'].shift(-1))
    # drop the last row because this is cycle cost per instruction
    data = data.drop(['cycle'], axis=1)[:-1]
    return data

# def get_phase_cycles(indicies, bset, data):
#     """Given indicies of interest and the corresponding data
#     return the cycle count of each phase.
#     """
#     start_symbols = [b.start for b in bset]
#     end_symbols = [b.end for b in bset]
    
#     # focus the data
#     df = data.iloc[indicies]
#     # clean up table
#     df["end"] = df["symbol"].shift(-1)
#     df["cost"] = -(df['cycle'] - df['cycle'].shift(-1))
#     df = df.drop(['cycle'], axis=1)[:-1]
#     # boundary matching
#     labels = []
#     for i, row in data.iterrows():
#         start = row['symbol']
#         end = row['end']
#         res = None
#         for b in bset:
#             if b.start == start and b.end == end:
#                 res = b.name
#                 break
#         labels.append(res)
#     print(labels)
#     # append labels in new column/series
#     df["phase"] = pd.Series(labels).values
            
#     return df

def get_phase_cycles2(indicies, bset, data):
    """Given indicies of interest and the corresponding data
    return the cycle count of each phase, along with the 
    start and end indicies of each phase which can be used
    for further analysis
    """
    start_symbols = [b.start for b in bset]
    end_symbols = [b.end for b in bset]
    
    # focus the data
    df = data.iloc[indicies]
    # boundary matching
    phases = []
    cycle_counts = []
    start_indicies = []
    end_indicies = []
    for startidx, row in df.iterrows():
        # continue up until the startidx equals the previous endidx
        if len(end_indicies) > 0:
            if startidx < end_indicies[-1]:
                continue
        start = row['symbol']
        # scan everything in front of the current row
        for endidx, next_row in df.loc[startidx:].iterrows():
            end = next_row['symbol']
            # print(start, end)
            found_end = False
            for b in bset:
                if b.start == start and b.end == end:
                    total_cycles = next_row['cycle'] - row['cycle'] 
                    phases.append(b.name)
                    cycle_counts.append(total_cycles)
                    start_indicies.append(startidx)
                    end_indicies.append(endidx)
                    found_end = True
                    break # on first one, we can stop
            if found_end:
                break
    
    labels = ["phases", "cycle_counts", "start_index", "end_index"]
    res = pd.DataFrame([phases, cycle_counts, start_indicies, end_indicies]).transpose()
    res.columns = labels
    return res


# Macro boundaries for the enclave
# We will also want to have smaller microboundaries for the enclave within these larger bucket.
# For these, we will not create separate boundaries.
# With the boundaries within boundaries, we can identify what's slowing down the enclave.
# Smaller bounds should come first.
create_bound = Boundary("create_enclave", "keystone_create_enclave", "keystone_run_enclave")
run_bound = Boundary("run_enclave", "keystone_run_enclave", "rt_base")
runtime_attest_bound = Boundary("enclave_runtimeuser4", "rt_base", "mcall_sm_not_implemented") # weird case
runtime_bound = Boundary("enclave_runtimeuser", "rt_base", "mcall_sm_attest_enclave")
attest_copy_bound = Boundary("enclave_attestation2", "mcall_sm_not_implemented", "return_to_encl") # weird case
attest_bound = Boundary("enclave_attestation", "mcall_sm_attest_enclave", "return_to_encl")
return_to_runtime_bound = Boundary("enclave_runtimeuser1", "return_to_encl", "mcall_sm_stop_enclave")
attest_edge_call_bound2 = Boundary("enclave_attestation_edge_call", "dispatch_edgecall_ocall", "return_to_encl")
attest_edge_call_bound = Boundary("enclave_attestation_edge_call", "mcall_sm_stop_enclave", "return_to_encl")
run_to_exit_bound = Boundary("enclave_runtimeuser_until_ocall", "return_to_encl", "dispatch_edgecall_ocall")
run_to_exit_bound = Boundary("enclave_runtimeuser2", "return_to_encl", "mcall_sm_exit_enclave")
run_wout_attest_bound = Boundary("enclave_runtimeuser3", "rt_base","mcall_sm_stop_enclave")
destroy_bound = Boundary("destroy_enclave", "destroy_enclave", "keystone_destroy_enclave_end")
destroy_bound_full = Boundary("destroy_enclave", "destroy_enclave", "_raw_spin_unlock_bh")

enclave_bset = [create_bound, run_bound, runtime_attest_bound, runtime_bound, attest_copy_bound, attest_bound, return_to_runtime_bound, 
                attest_edge_call_bound2, attest_edge_call_bound, run_to_exit_bound, run_wout_attest_bound, destroy_bound, destroy_bound_full]

native_bset = [] # we just want user mode?
ignore_set = ["printf"]

In [442]:
# Run Analysis
def get_large_buckets(data):
    bset = enclave_bset
    indicies = get_indices(bset, data)
    # display(data.iloc[indicies])
    results = get_phase_cycles2(indicies, bset, data)
    return results

In [432]:
aesres = get_large_buckets(aes)
display(aesres)
display(aes[:50])
aes2res = get_large_buckets(aes2)
display(aes2res)
attres = get_large_buckets(att)
display(attres)
att2res = get_large_buckets(att2) # looks odd.
display(att2res)

Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,45165080,0,110
1,run_enclave,5074,110,117
2,enclave_runtimeuser3,212822,117,83157
3,destroy_enclave,15320,83167,83169


Unnamed: 0,cycle,env,symbol
0,5154831588,linux,keystone_create_enclave
1,5154831720,linux,calculate_required_pages
2,5154831848,linux,create_enclave
3,5155033756,linux,epm_init
4,5155033820,linux,init_free_pages
5,5155039527,linux,get_free_page.part.0
6,5155040680,linux,keystone_rtld_init_runtim
7,5155401317,linux,rtld_vm_mmap
8,5155401447,linux,epm_alloc_rt_page
9,5155401557,linux,epm_alloc_page


Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,45121967,0,110
1,run_enclave,5046,110,117
2,enclave_runtimeuser3,213228,117,83157
3,destroy_enclave,15644,83167,83169


Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,49513319,0,130
1,run_enclave,5098,130,142
2,enclave_runtimeuser,3583,142,169
3,enclave_attestation,1276616,169,183
4,enclave_runtimeuser1,6715,183,212
5,enclave_attestation_edge_call,4908128,212,233
6,enclave_runtimeuser2,2251,233,244


Unnamed: 0,phases,cycle_counts,start_index,end_index
0,create_enclave,49529309,0,115
1,run_enclave,5209,115,122
2,enclave_runtimeuser4,3506,122,149
3,enclave_attestation2,1274127,149,155
4,enclave_runtimeuser2,4909553,155,191
5,enclave_runtimeuser1,1752,196,207
6,destroy_enclave,15550,217,219


In [447]:
caes = get_cost_table(aes)

In [448]:
caes[0:110].pivot(columns='symbol', values='cost').bfill().iloc[[0],:] # example for creating enclave

symbol,__ept_walk_internal.const,calculate_required_pages,create_enclave,enclave_idr_alloc,epm_alloc_page,epm_alloc_rt_page,epm_alloc_rt_page_noexec,epm_alloc_user_page,epm_alloc_user_page_noexe,epm_init,...,keystone_rtld_init_untrus,mcall_sm_destroy_enclave,pmp_region_free_atomic,pmp_set,rtld_vm_mmap,search_rightmost_unset,send_and_sync_pmp_ipi,sign,utm_alloc_page,utm_init
0,227.0,128.0,201908.0,48495.0,105.0,110.0,53.0,59.0,52.0,64.0,...,62.0,67345.0,388.0,1219.0,130.0,9546.0,61.0,77829.0,14.0,13.0


In [431]:
import plotly.plotly as py
import cufflinks as cf

# You'll need to set API key using:
# plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')
# Find your api_key here: https://plot.ly/settings/api
# Cufflinks binds plotly to pandas dataframes in IPython notebook.

cf.set_config_file(offline=False, world_readable=True, theme='ggplot')

attres.iplot(kind='bar', barmode='stack')

In [231]:
aesres.iplot(kind='bar', barmode='stack')

In [38]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.289982,0.525064,0.106397,0.200717
1,0.857107,0.688147,0.068649,0.746845
2,0.27878,0.832076,0.562539,0.529062
3,0.356914,0.174221,0.265477,0.805928
4,0.785437,0.199188,0.371346,0.735639
5,0.715124,0.873299,0.65221,0.285473
6,0.840026,0.45625,0.692743,0.094383
7,0.576376,0.678161,0.362135,0.287692
8,0.859217,0.583189,0.924251,0.029439
9,0.83602,0.510472,0.643956,0.090139


In [40]:
df.iplot(kind='bar', barmode='stack')