In [1]:
import pandas as pd
import numpy as np
import math
import sys  
sys.path.insert(0, '../src')
from dnns2 import *

In [None]:
""" Helper functions
"""

def prev_store1_ea(x, df, idx_s):
    idx_s = [i for i in idx_s if i < x]
    if not idx_s:
        return np.nan
    idx = max(idx_s)
    return df.loc[idx, 'effective_address']

def prev_store1_v0(x, df, idx_s):
    idx_s = [i for i in idx_s if i < x]
    if not idx_s:
        return np.nan
    idx = max(idx_s)
    return df.loc[idx, 'val0']

def prev_store2_ea(x, df, idx_s):
    idx_s2 = [i for i in idx_s if i < x]
    if not idx_s2:
        return np.nan
    idx = max(idx_s2)
    idx_s2 = [i for i in idx_s if i < idx]
    if not idx_s2:
        return np.nan
    idx = max(idx_s2)
    return df.loc[idx, 'effective_address']
    
def prev_store2_v0(x, df, idx_s):
    idx_s2 = [i for i in idx_s if i < x]
    if not idx_s2:
        return np.nan
    idx = max(idx_s2)
    idx_s2 = [i for i in idx_s if i < idx]
    if not idx_s2:
        return np.nan
    idx = max(idx_s2)
    return df.loc[idx, 'val0']

""" Data prep functions
"""

def prepDataFrame(filename, read_stores=True):
    """ Read in the loads and stores from a trace and 
    """

    # Read in the data and get it ready for computation
    df = pd.read_csv(filename)
    df = df.replace('NAN', np.nan)
    df['pc'] = df['pc'].astype('int64')
    df['effective_address'] = df['effective_address'].astype('int64')
    df['val0'] = df['val0'].astype('int64')

    # Build a dataframe with the 64bit values split into 32 bit values
    df1 = pd.DataFrame(index=df[df['type'] == 'l'].index)
    df1['pc-l'] = df['pc'] % math.pow(2, 31) # lower 32 bits
    df1['ea-l'] = df['effective_address'] % math.pow(2, 31)
    df1['ea-u'] = (df['effective_address'] - df1['ea-l'])/math.pow(2, 31) #remove trailing zeros
    df1['v0-l'] = df['val0'] % math.pow(2, 31)
    df1['v0-u'] = (df['val0'] - df1['v0-l'])/math.pow(2, 31) #remove trailing zeros

    # Scale all values in df1 into the interval [0,1]
    df1['pc-l'] = df1['pc-l']/df1['pc-l'].max()
    df1['ea-l'] = df1['ea-l']/df1['ea-l'].max()
    df1['ea-u'] = df1['ea-u']/df1['ea-u'].max()
    df1['v0-l'] = df1['v0-l']/df1['v0-l'].max()
    df1['v0-u'] = df1['v0-u']/df1['v0-u'].max()

    # Add columns for the previous k loads (effective address and value)
    for i in [1,2]:
        # least sig bits of effective address
        ealname = 'eal-' + str(i)
        df1[ealname] = df1['ea-l'].shift(i)
        # most sig bits of effective address
        eauname = 'eau-' + str(i)
        df1[ealname] = df1['ea-u'].shift(i)
        # least sig bits of the value
        lname = 'v0l-' + str(i)
        df1[lname] = df1['v0-l'].shift(i)
        # most sig bits of the value
        uname = 'v0u-' + str(i)
        df1[uname] = df1['v0-u'].shift(i)

    # Add columns for the first 32 and last 32 bits of the previous 2 store values
    # Create the file name where the store columns must be read/written from/to.
    store_file = filename[:-4] + 'store_colsDNN2.csv'
    dfs = pd.DataFrame(index=df[df['type'] == 's'].index)
    if (read_stores):
        # The columns for stores have already been computed for this file. 
        # Read the file and put the columns in df1
        
        #TODO: write code to read in csv (of 64 bit values) they will need to be split!
        
        store_columns = pd.read_csv(store_file, index_col=0)
        df['sea-1'] = store_columns['sea-1'].astype('float64')
        df['sv0-1'] = store_columns['sv0-1'].astype('float64')
        df['sea-2'] = store_columns['sea-2'].astype('float64')
        df['sv0-2'] = store_columns['sv0-2'].astype('float64')
    else:
        # The columns for the stroes have not been previously computed. Compute
        # these columns and write them to a file for later use

        # Get the effective address and value of the past two loads
        df['sea-1'] = df1.index.to_series().apply(lambda x : prev_store1_ea(x, df, list(dfs.index)))
        df['sv0-1'] = df1.index.to_series().apply(lambda x : prev_store1_v0(x, df, list(dfs.index)))
        df['sea-2'] = df1.index.to_series().apply(lambda x : prev_store2_ea(x, df, list(dfs.index)))
        df['sv0-2'] = df1.index.to_series().apply(lambda x : prev_store2_v0(x, df, list(dfs.index)))
        #df['sea-1'] = df1.index.to_series().apply(lambda x : prev_store1_ea(x, df, dfs))
#         df['sv0-1'] = df1.index.to_series().apply(lambda x : prev_store1_v0(x, df, dfs))
#         df['sea-2'] = df1.index.to_series().apply(lambda x : prev_store2_ea(x, df, dfs))
#         df['sv0-2'] = df1.index.to_series().apply(lambda x : prev_store2_v0(x, df, dfs))
        
        # Write the columns to a file to save time in the future
        store_columns = pd.DataFrame(index=df1.index)
        store_columns['sea-1'] = df['sea-1']
        store_columns['sv0-1'] = df['sv0-1']
        store_columns['sea-2'] = df['sea-2']
        store_columns['sv0-2'] = df['sv0-2']
        store_columns.to_csv(store_file)
    
    # Break up the 64 bit values into 32 bit values
    for i in [1,2]:
        df1['Seal-' + str(i)] = df['sea-' + str(i)] % math.pow(2,31)
        df1['Seau-' + str(i)] = (df['sea-' + str(i)] - df1['Seal-' + str(i)])/math.pow(2,31)
        df1['Sv0l-' + str(i)] = df['sv0-' + str(i)] % math.pow(2,31)
        df1['Sv0u-' + str(i)] = (df['sv0-' + str(i)] - df1['Sv0l-' + str(i)])//math.pow(2,31)

    # Reorder the columns so the 'outputs' are the last 2 columns
#     df1 = df1[["pc-l", "ea-l", "ea-u", "v0l-1", "v0u-1", "v0l-2", "v0u-2", "v0l-3", \
#                "v0u-3", "v0l-4", "v0u-4", "s-1l", "s-1u", "s-2l", "s-2u", "v0-l", "v0-u"]]

    df1 = df1[['pc-l', 'ea-l', 'ea-u', 'eal-1', 'v0l-1', 'v0u-1', 'eal-2', 'v0l-2', 'v0u-2', \
               'Seal-1', 'Seau-1', 'Sv0l-1', 'Sv0u-1', 'Seal-2', 'Seau-2', 'Sv0l-2', 'Sv0u-2', \
               'v0-l', 'v0-u']]

    # Drop all rows with nans
    return df1#.dropna()


In [None]:
filename = "../data/1M-LdSt.csv"
df = prepDataFrame(filename, read_stores=False)

In [None]:
df

In [87]:
# Read in the data and get it ready for computation
df = pd.read_csv(filename)
#df = df[df.index < 20]
df = df.replace('NAN', np.nan)
df['pc'] = df['pc'].astype('int64')
df['effective_address'] = df['effective_address'].astype('int64')
df['val0'] = df['val0'].astype('int64')

# Build a dataframe with the 64bit values split into 32 bit values
df1 = pd.DataFrame(index=df[df['type'] == 'l'].index)
df1['pc-l'] = df['pc'] % math.pow(2, 31) # lower 32 bits
df1['ea-l'] = df['effective_address'] % math.pow(2, 31)
df1['ea-u'] = (df['effective_address'] - df1['ea-l'])/math.pow(2, 31) #remove trailing zeros
df1['v0-l'] = df['val0'] % math.pow(2, 31)
df1['v0-u'] = (df['val0'] - df1['v0-l'])/math.pow(2, 31) #remove trailing zeros

# Scale all values in df1 into the interval [0,1]
df1['pc-l'] = df1['pc-l']/df1['pc-l'].max()
df1['ea-l'] = df1['ea-l']/df1['ea-l'].max()
df1['ea-u'] = df1['ea-u']/df1['ea-u'].max()
df1['v0-l'] = df1['v0-l']/df1['v0-l'].max()
df1['v0-u'] = df1['v0-u']/df1['v0-u'].max()

# Add columns for the previous k loads (effective address and value)
for i in [1,2]:
    # least sig bits of effective address
    ealname = 'eal-' + str(i)
    df1[ealname] = df1['ea-l'].shift(i)
    # most sig bits of effective address
    eauname = 'eau-' + str(i)
    df1[ealname] = df1['ea-u'].shift(i)
    # least sig bits of the value
    lname = 'v0l-' + str(i)
    df1[lname] = df1['v0-l'].shift(i)
    # most sig bits of the value
    uname = 'v0u-' + str(i)
    df1[uname] = df1['v0-u'].shift(i)

# Add columns for the first 32 and last 32 bits of the previous 2 store values
# Create the file name where the store columns must be read/written from/to.
store_file = filename[:-4] + 'store_colsDNN2.csv'
dfs = pd.DataFrame(index=df[df['type'] == 's'].index)

In [88]:
df1

Unnamed: 0,pc-l,ea-l,ea-u,v0-l,v0-u,eal-1,v0l-1,v0u-1,eal-2,v0l-2,v0u-2
0,0.808481,0.004165,0.0,3.150437e-03,0.000000,,,,,,
1,0.808483,1.000000,1.0,3.253406e-02,0.000000,0.0,3.150437e-03,0.000000,,,
2,0.808484,1.000000,1.0,4.332530e-03,0.000000,1.0,3.253406e-02,0.000000,0.0,3.150437e-03,0.000000
3,0.808484,1.000000,1.0,2.158662e-02,0.000000,1.0,4.332530e-03,0.000000,1.0,3.253406e-02,0.000000
4,0.808485,1.000000,1.0,9.999916e-01,0.000031,1.0,2.158662e-02,0.000000,1.0,4.332530e-03,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
262099,0.999946,1.000000,1.0,8.927360e-03,0.000000,1.0,2.158709e-02,0.000000,0.0,0.000000e+00,0.000000
262100,0.999946,1.000000,1.0,1.443550e-08,0.000000,1.0,8.927360e-03,0.000000,1.0,2.158709e-02,0.000000
262101,0.999947,1.000000,1.0,9.999916e-01,0.000031,1.0,1.443550e-08,0.000000,1.0,8.927360e-03,0.000000
262103,0.572340,0.004870,0.0,2.710940e-05,0.000000,1.0,9.999916e-01,0.000031,1.0,1.443550e-08,0.000000


In [89]:
df

Unnamed: 0,type,pc,effective_address,num_values,val0,val1,val2
0,l,6765496,8943720,1,6765512,,
1,l,6765512,281474976692496,2,69866368,6765600,
2,l,6765516,281474976692480,2,9304038,2,
3,l,6765520,281474976692464,2,46356912,1,
4,l,6765524,281474976692448,3,281474976692512,10458672,47
...,...,...,...,...,...,...,...
266622,s,8130216,115574656,1,177064,,
266623,s,8130216,115574720,1,177000,,
266624,s,8130216,115574784,1,176936,,
266625,s,8130216,115574848,1,176872,,


In [None]:
# Get the effective address and value of the past two loads
df['sea-1'] = df1.index.to_series().apply(lambda x : prev_store1_ea(x, df, list(dfs.index)))

In [75]:
df

Unnamed: 0,type,pc,effective_address,num_values,val0,val1,val2,sea-1
0,l,6765496,8943720,1,6765512,,,
1,l,6765512,281474976692496,2,69866368,6765600,,
2,l,6765516,281474976692480,2,9304038,2,,
3,l,6765520,281474976692464,2,46356912,1,,
4,l,6765524,281474976692448,3,281474976692512,10458672,47,
...,...,...,...,...,...,...,...,...
266622,s,8130216,115574656,1,177064,,,
266623,s,8130216,115574720,1,177000,,,
266624,s,8130216,115574784,1,176936,,,
266625,s,8130216,115574848,1,176872,,,


In [None]:
df['sea-1'].dropna()

In [None]:
filename = "../data/1M-LdSt.csv"
df = prepDataFrame(filename)
num_training_examples = int(0.8*len(df))
X_tr, X_te, y_tr, y_te = test_train_split(df, num_training_examples)