In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import cycle

In [None]:
# Trace builiding code
class Trace:
    def __init__(self):
        self.calls = []

class Node_info:
    def __init__(self, num_id, n_type):
        self.num_id = num_id
        self.n_type = n_type

class Call:
    def __init__(self, traceid, timestamp, rpcid, um, dm, rpctype, interface, rt):
        self.traceid = traceid
        self.timestamp = timestamp
        self.rpcid = rpcid
        self.um = um
        self.dm = dm 
        self.rpctype = rpctype
        self.interface = interface
        self.rt = rt
    def string(self):
        return self.traceid + "," + str(self.timestamp) + "," + self.rpcid + "," + self.um + "," + self.dm + "," +\
            self.rpctype + "," + self.interface + "," + str(self.rt)

def csv_to_df(file: str):
    df = pd.read_csv(file)
    return df

def extract_traceid_rows(df, tid):
    f_df = df[df['traceid'] == tid]
    tid_calls = [
        Call(
            str(row.traceid), 
            int(row.timestamp), 
            str(row.rpcid), 
            str(row.um), 
            str(row.dm), 
            str(row.rpctype), 
            str(row.interface), 
            int(row.rt)
        ) for row in f_df.itertuples(index=False)
    ]
    return tid_calls

def get_call_depth(rpc_id):
    if rpc_id == "0":
        return 1
    else:
        call_depth = 0
        for i in rpc_id:
            if i == ".":
                call_depth += 1
    return call_depth


In [None]:
# types :  ['userDefined', 'db', 'http', 'mq', 'rpc', 'mc']
# Extract to dataframe
df = csv_to_df("./MSCallGraph_0.csv")

# Extract all tids
tids_list = df['traceid'].unique()
num_traces = len(tids_list)
print("Num of tids: ", num_traces)

In [None]:
'''
NIS calc
Node Metrics: [type, NIS]
'''
nis_dict = {}
ctr = 0
for row in tqdm(df.itertuples(), total=len(df)):
    ctr += 1
    um_node = row.um
    dm_node = row.dm
    # Update nis dict for um_node
    if um_node in nis_dict:
        if row.traceid not in nis_dict[um_node]:
            nis_dict[um_node].append(row.traceid)
    else:
        nis_dict[um_node] = [row.traceid]
    # Update nis dict for dm_node
    if dm_node in nis_dict:
        if row.traceid not in nis_dict[dm_node]:
            nis_dict[dm_node].append(row.traceid)
    else:
        nis_dict[dm_node] = [row.traceid]
    # if ctr == 50:
    #     break
print(len(nis_dict))
# replace traceid list with NIS
for node in nis_dict:
    nis = len(nis_dict[node])/num_traces # TO BE REPLACED
    nis_dict[node] = [round(nis,5)]
print(nis_dict)

In [None]:
'''
Trace metrics calc
Trace Metrics: [trace_depth, TIS, initial_node, num_sf, num_sl]
'''

trace_met_dict = {}
cd_strat_dict = {} # dict. key: cd, val: [tid ,tis]
ctr = 0

for i in tqdm(range(len(tids_list))):
    ctr += 1
    # if i in [0,1,2]:
    #     continue
    tid_calls = extract_traceid_rows(df, tids_list[i])
    initial_node = ""
    t_edges = [] # list of edges of trace
    t_sf_ctr = 0
    t_sl_ctr = 0
    trace_depth = 0
    for call in tid_calls:
        t_edges.append([call.um, call.dm])
        call_depth = get_call_depth(call.rpcid)
        if call_depth > trace_depth: # update trace depth
            trace_depth = call_depth
        if call.rpcid == "0": # get initial node of trace
            initial_node = call.um
        if call.rpctype == "db": # get sf sl node count
            t_sf_ctr += 1
        else: 
            t_sl_ctr += 1

    # TIS calculation
    t_nodes = []
    for edge in t_edges:
        for node in edge:
            if node not in t_nodes:
                t_nodes.append(node)
    tis = 0
    for node in t_nodes:
        nis_temp = nis_dict[node][0]
        tis += nis_temp 
    tis = tis/len(t_nodes)

    # Collecting data for stratification
    if trace_depth not in cd_strat_dict:
        cd_strat_dict[trace_depth] = []
    cd_strat_dict[trace_depth].append([tids_list[i],tis])

    trace_met_dict[tids_list[i]] = [trace_depth, tis, initial_node, t_sf_ctr, t_sl_ctr]

    if ctr == 5:
        break
        

In [None]:
'''
Stratification
'''
t_red = 0 # reduced num of traces (USER DEFINED)
cd_strat_arr = [] # [valid call depth, % of tids of resp cd]
# ctr is used as the total num of tids

# Collecting cd percentage in the original trace
for cd in cd_strat_dict:
    percent_t_cd = 100 * (len(cd_strat_dict[cd])/ctr)
    cd_strat_arr.append[cd, percent_t_cd]
    cd_strat_dict[cd] = cd_strat_dict[cd].sort(reverse=True) # sorting tids based on tis


In [None]:
'''
NIS: Priority Sampling
'''
n = 0 # Reduced num of nodes (USER DEFINED)
# Sorted nis_dict based on NIS values
sorted_node_items = sorted(nis_dict.items(), key=lambda item: item[1][1], reverse=True)
nid_nis_arr_sorted_dict = dict(sorted_node_items)
sampled_nodes = list(nid_nis_arr_sorted_dict.keys()[:n]) # List of selected node ids

In [None]:
'''
TIS: Priority Sampling
'''
sampled_tids_inter = [] # T_inter

# Priority sampling based on TIS
for cd in cd_strat_arr:
    num_traces_to_sample_for_cd = t_red * cd[1]
    if num_traces_to_sample_for_cd < 1: 
        continue
    cd_traces_list = cd_strat_dict[cd[0]]
    cd_priority_sample = cd_traces_list[:num_traces_to_sample_for_cd]
    sampled_tids_inter.extend(cd_priority_sample)

In [None]:
'''
Node Pruning
Remove all nodes from T_inter that is not in sampled_nodes
'''

def cycle_func(A, B, pred_flag):
    # Cycle through B if it's shorter than A
    B_cycle = cycle(B)
    # Create tuples by pairing elements from A with elements from the cycled B
    if pred_flag == 1: # pred flag is used to check if direction is from pred to succ
        result = [(a, next(B_cycle)) for a in A]
    else:
        result = [(next(B_cycle), a) for a in A]
    return result


def remove_nodes_with_reconnect(G, node):
    '''
    Remove given node and check if graph breaks. If yes, connect pred nodes
    to succ nodes and then remove given node.
    returns: Updated graph after removing the given node
    '''
    G_temp = G.copy()
    G_temp.remove_node(node)

    if not nx.is_weakly_connected(G_temp): # checking if bridge node
        components = list(nx.weakly_connected_components)
        if len(components) > 1: # if bridge node
            pred_nodes = list(G.predecessors(node))
            succ_nodes = list(G.successors(node))
            if len(pred_nodes) >= len(succ_nodes):
                edges_to_add = cycle_func(pred_nodes, succ_nodes, 1)
            else:
                edges_to_add = cycle_func(succ_nodes, pred_nodes, 0)
            for edge in edges_to_add:
                G.add_edge(edge[0], edge[1])
    G.remove_node(node)

# Sampled_nodes has the selected nodes
for tid in sampled_tids_inter:
    # build graph
    # get list of nodes to remove
    # do node pruning
    pass

In [3]:
from itertools import cycle

def map_elements(A, B):
    # Cycle through B if it's shorter than A
    B_cycle = cycle(B)
    
    # Create tuples by pairing elements from A with elements from the cycled B
    result = [(a, next(B_cycle)) for a in A]
    
    return result

# Example usage
A = [1, 2, 3, 4, 5]
B = [1, 2, 3]

mapped_tuples = map_elements(A, B)
print(mapped_tuples[0][0])


1
