In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import cycle

In [3]:
# Trace builiding code
class Trace:
    def __init__(self):
        self.calls = []

class Node_info:
    def __init__(self, num_id, n_type):
        self.num_id = num_id
        self.n_type = n_type

class Call:
    def __init__(self, traceid, timestamp, rpcid, um, dm, rpctype, interface, rt):
        self.traceid = traceid
        self.timestamp = timestamp
        self.rpcid = rpcid
        self.um = um
        self.dm = dm 
        self.rpctype = rpctype
        self.interface = interface
        self.rt = rt
    def string(self):
        return self.traceid + "," + str(self.timestamp) + "," + self.rpcid + "," + self.um + "," + self.dm + "," +\
            self.rpctype + "," + self.interface + "," + str(self.rt)

def csv_to_df(file: str):
    use_columns = list(range(9)) 
    df = pd.read_csv(file,delimiter=',', usecols=use_columns)
    return df

def extract_traceid_rows(df, tid):
    f_df = df[df['traceid'] == tid]
    tid_calls = [
        Call(
            str(row.traceid), 
            int(row.timestamp), 
            str(row.rpcid), 
            str(row.um), 
            str(row.dm), 
            str(row.rpctype), 
            str(row.interface), 
            int(row.rt)
        ) for row in f_df.itertuples(index=False)
    ]
    return tid_calls

def get_call_depth(rpc_id):
    if rpc_id == "0":
        return 1
    else:
        call_depth = 0
        for i in rpc_id:
            if i == ".":
                call_depth += 1
    return call_depth


In [7]:
# types :  ['userDefined', 'db', 'http', 'mq', 'rpc', 'mc']
# Extract to dataframe

df = csv_to_df("./casper_rebuild.csv")

# Extract all tids
tids_list = df['traceid'].unique()
num_traces = len(tids_list)
print("Num of tids: ", num_traces)

Num of tids:  10421


In [8]:
'''
NIS calc
Node Metrics: [type, NIS]
'''
nis_dict = {}
ctr = 0
for row in tqdm(df.itertuples(), total=len(df)):
    ctr += 1
    um_node = row.um
    dm_node = row.dm
    # Update nis dict for um_node
    if um_node in nis_dict:
        if row.traceid not in nis_dict[um_node]:
            nis_dict[um_node].append(row.traceid)
    else:
        nis_dict[um_node] = [row.traceid]
    # Update nis dict for dm_node
    if dm_node in nis_dict:
        if row.traceid not in nis_dict[dm_node]:
            nis_dict[dm_node].append(row.traceid)
    else:
        nis_dict[dm_node] = [row.traceid]
    # if ctr == 1000:
    #     break
print(len(nis_dict))
# replace traceid list with NIS
num_traces = 10421 ### TO BE REPLACED
for node in nis_dict:
    nis_dict[node] = set(nis_dict[node])
    nis = len(nis_dict[node])/num_traces ### TO BE REPLACED
    nis_dict[node] = [round(nis,7)]
# print(nis_dict)

100%|██████████| 350980/350980 [00:05<00:00, 60876.62it/s] 

3656





In [14]:
print(nis_dict)
print(len(nis_dict))

{'(?)': [1.5990465], '95a6f7f8345e2eca31ee74ddc19d547e7fc0f5c8e65772d7b08a68eb5214dc44': [0.5868815], '9653f5baba69c9fb50bfb30a8571eb04dbceaae7c7f379e20bd73a41168a2913': [0.7363143], '5c5200228afeec02308f5fdd4bdfa256b2245a163b3c81be10d0ecd190bdc32d': [0.0941969], '2e851da990f42464b9a776942a772cb4911dfca4778b0d3e28dceb3337d65c34': [0.429229], '04940a16b54c3d25fe069d5fa3b209e6897040996c3715a8f684b7dddb1d7ef9': [0.4065428], '6746853e39c5008988baf677bf27c846ee99173cafdec2aa8c949843d3a94b95': [0.517179], '9cc1113009ea0ec1a4cf99fa6984bd49be8dd026be42921fc6897e5bf121efad': [0.4241328], '7695b43b41732a0f15d3799c8eed2852665fe8da29fd700c383550fc16e521a3': [0.7082032], '0b0c981da8042a436f30f7d9f7cce74360abe630bd54785016816e10f4d22525': [0.5959231], '4fc1bfac21858f3290fc0a0184e6e9f9faf369ed377c1868f81973453b2bbfe9': [0.092553], '9f8edf2b85a2974dafdae8d961043bfa2f2c6e27485a0045bc04d518943d2429': [0.0894296], 'e6c95bef37d936ebb375bf135ded88c96eb9257a1d3d9de38a1dbd2751b0fbce': [0.1403913], '75e56c8fb

In [9]:
'''
Trace metrics calc
Trace Metrics: [trace_depth, TIS, initial_node, num_sf, num_sl]
'''

trace_met_dict = {}
cd_strat_dict = {} # dict. key: cd, val: [tid ,tis]
ctr = 0

for i in tqdm(range(len(tids_list))):
    ctr += 1
    # if i in [0,1,2]:
    #     continue
    tid_calls = extract_traceid_rows(df, tids_list[i])
    initial_node = ""
    t_edges = [] # list of edges of trace
    t_sf_ctr = 0
    t_sl_ctr = 0
    trace_depth = 0
    for call in tid_calls:
        t_edges.append([call.um, call.dm])
        call_depth = get_call_depth(call.rpcid)
        if call_depth > trace_depth: # update trace depth
            trace_depth = call_depth
        if call.rpcid == "0": # get initial node of trace
            initial_node = call.um
            # print(initial_node)
        if call.rpctype == "db": # get sf sl node count
            t_sf_ctr += 1
        else: 
            t_sl_ctr += 1

    # TIS calculation
    t_nodes = []
    for edge in t_edges:
        for node in edge:
            if node not in t_nodes:
                t_nodes.append(node)
    tis = 0
    for node in t_nodes:
        nis_temp = nis_dict[node][0]
        tis += nis_temp 
    tis = tis/len(t_nodes)

    # Collecting data for stratification
    if trace_depth not in cd_strat_dict:
        cd_strat_dict[trace_depth] = []
    cd_strat_dict[trace_depth].append([tids_list[i],tis])

    trace_met_dict[tids_list[i]] = [trace_depth, tis, initial_node, t_sf_ctr, t_sl_ctr]

    # if ctr == 5:
    #     break
        

100%|██████████| 10421/10421 [02:42<00:00, 64.05it/s]


In [10]:
print(trace_met_dict)

{'0b13987515919537679446000e20c6': [5, 0.34976250000000003, '', 0, 11], '0b133a7615919537803359000ec0fc': [8, 0.20245319411764703, '(?)', 0, 55], '0b19973915919537897651000f': [4, 0.05504433191489359, '(?)', 18, 115], '0b133ba915919537571387000e64ad': [4, 0.41793015, '', 0, 5], '0b14005e15919536451202000e5ec1': [4, 0.3166134428571428, '', 0, 17], '0b1339ef15919536802818000ea36a': [4, 0.32871126250000005, '(?)', 0, 18], '0b142abf15919538377749000e9be9': [3, 0.2499919833333333, '', 0, 7], '0b51054e15919536455104000e2efb': [5, 0.37612753750000005, '', 0, 11], '0b4063061591953854923300035362': [5, 0.4197614, '', 0, 4], '0b4504e91591953835722400023254': [5, 0.1089787, '', 0, 4], '0b1476cd15919536786739000f': [3, 0.013121659259259259, '', 72, 326], '0b19973915919538545565000f': [10, 0.056220164814814816, '(?)', 53, 154], '0b14283415919538357369000ef8da': [4, 0.41702332000000003, '(?)', 0, 6], '0b9345d31591953839011600011398': [5, 0.4197614, '', 0, 4], '0bfadb4615919534121407000d0a47': [5, 0.

In [12]:
'''
Stratification
'''
t_red = 0 # reduced num of traces (USER DEFINED)
cd_strat_arr = [] # [valid call depth, % of tids of resp cd]
cd_percent_arr = []
# ctr is used as the total num of tids

# Collecting cd percentage in the original trace
for cd in cd_strat_dict:
    percent_t_cd = 100 * (len(cd_strat_dict[cd])/num_traces)
    cd_percent_arr.append([cd, percent_t_cd])
    cd_strat_dict[cd] = cd_strat_dict[cd].sort(reverse=True) # sorting tids based on tis


In [14]:
print(cd_percent_arr)
res= 0
for i in cd_percent_arr:
    res += i[1]
print(res)

[[5, 18.606659629594088], [8, 6.47730544093657], [4, 30.505709624796083], [3, 16.38038575952404], [10, 2.303041934555225], [6, 8.895499472219557], [9, 3.185874676134728], [11, 2.293445926494578], [13, 1.4873812494002496], [14, 0.3646483063045773], [12, 2.638902216677862], [2, 1.7272814509164187], [7, 3.5313309663180115], [0, 0.31666826600134346], [1, 0.8156606851549755], [15, 0.1919201612129354], [16, 0.11515209672776125], [20, 0.028788024181940312], [17, 0.07676806448517416], [19, 0.01919201612129354], [18, 0.028788024181940312], [21, 0.00959600806064677]]
99.99999999999999


In [None]:
'''
NIS: Priority Sampling
'''
n = 0 # Reduced num of nodes (USER DEFINED)
# Sorted nis_dict based on NIS values
sorted_node_items = sorted(nis_dict.items(), key=lambda item: item[1][1], reverse=True)
nid_nis_arr_sorted_dict = dict(sorted_node_items)
sampled_nodes = list(nid_nis_arr_sorted_dict.keys()[:n]) # List of selected node ids

In [None]:
'''
TIS: Priority Sampling
'''
sampled_tids_inter = [] # T_inter

# Priority sampling based on TIS
for cd in cd_strat_arr:
    num_traces_to_sample_for_cd = t_red * cd[1]
    if num_traces_to_sample_for_cd < 1: 
        continue
    cd_traces_list = cd_strat_dict[cd[0]]
    cd_priority_sample = cd_traces_list[:num_traces_to_sample_for_cd]
    sampled_tids_inter.extend(cd_priority_sample)

In [None]:
'''
Node Pruning
Remove all nodes from T_inter that is not in sampled_nodes
'''

def cycle_func(A, B, pred_flag):
    # Cycle through B if it's shorter than A
    B_cycle = cycle(B)
    # Create tuples by pairing elements from A with elements from the cycled B
    if pred_flag == 1: # pred flag is used to check if direction is from pred to succ
        result = [(a, next(B_cycle)) for a in A]
    else:
        result = [(next(B_cycle), a) for a in A]
    return result


def remove_nodes_with_reconnect(G, node):
    '''
    Remove given node and check if graph breaks. If yes, connect pred nodes
    to succ nodes and then remove given node.
    returns: Updated graph after removing the given node
    '''
    G_temp = G.copy()
    G_temp.remove_node(node)

    if not nx.is_weakly_connected(G_temp): # checking if bridge node
        components = list(nx.weakly_connected_components)
        if len(components) > 1: # if bridge node
            pred_nodes = list(G.predecessors(node))
            succ_nodes = list(G.successors(node))
            if len(pred_nodes) >= len(succ_nodes):
                edges_to_add = cycle_func(pred_nodes, succ_nodes, 1)
            else:
                edges_to_add = cycle_func(succ_nodes, pred_nodes, 0)
            for edge in edges_to_add:
                G.add_edge(edge[0], edge[1])
    G.remove_node(node)

# Sampled_nodes has the selected nodes
for tid in sampled_tids_inter:
    # build graph
    # get list of nodes to remove
    # do node pruning
    pass

In [3]:
from itertools import cycle

def map_elements(A, B):
    # Cycle through B if it's shorter than A
    B_cycle = cycle(B)
    
    # Create tuples by pairing elements from A with elements from the cycled B
    result = [(a, next(B_cycle)) for a in A]
    
    return result

# Example usage
A = [1, 2, 3, 4, 5]
B = [1, 2, 3]

mapped_tuples = map_elements(A, B)
print(mapped_tuples[0][0])


1
