In [7]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from tqdm import tqdm
from networkx.algorithms.similarity import optimize_graph_edit_distance

In [2]:
# Trace builiding code
class Trace:
    def __init__(self):
        self.calls = []

class Node_info:
    def __init__(self, num_id, n_type):
        self.num_id = num_id
        self.n_type = n_type

class Call:
    def __init__(self, traceid, timestamp, rpcid, um, dm, rpctype, interface, rt):
        self.traceid = traceid
        self.timestamp = timestamp
        self.rpcid = rpcid
        self.um = um
        self.dm = dm 
        self.rpctype = rpctype
        self.interface = interface
        self.rt = rt
    def string(self):
        return self.traceid + "," + str(self.timestamp) + "," + self.rpcid + "," + self.um + "," + self.dm + "," +\
            self.rpctype + "," + self.interface + "," + str(self.rt)

def csv_to_df(file: str):
    df = pd.read_csv(file)
    return df

def extract_traceid_rows(df, tid):
    f_df = df[df['traceid'] == tid]
    tid_calls = [
        Call(
            str(row.traceid), 
            int(row.timestamp), 
            str(row.rpcid), 
            str(row.um), 
            str(row.dm), 
            str(row.rpctype), 
            str(row.interface), 
            int(row.rt)
        ) for row in f_df.itertuples(index=False)
    ]
    return tid_calls



In [8]:
'''
Metrics Extraction functions
'''

def get_call_depth(rpc_id):
    if rpc_id == "0":
        return 1
    else:
        call_depth = 0
        for i in rpc_id:
            if i == ".":
                call_depth += 1
    return call_depth


def gen_base_graph(n):
    '''
    Returns a base graph of depth n
    '''
    edges = [(i, i+1) for i in range(n-1)]
    G = nx.DiGraph(edges)
    return G

def plot_graph_simple(graph, filename):

  plt.figure(figsize=(8, 6))  # Adjust figure size as needed
  pos = nx.spring_layout(graph)
  nx.draw(graph, pos, with_labels=False, node_color='lightblue', edge_color='black')
  # plt.title("Graph")
  # plt.xlabel("X-axis")
  # plt.ylabel("Y-axis")

  plt.savefig(filename, dpi=300, bbox_inches='tight')  # Save image with high quality
  plt.close() 

def build_digraph(df, tids_list):
    '''
    Builds digraph. 
    Args: df= list of trace rows, list of tids to build and add
    Returns: A full di graph with tids given.
    '''
    for i in range(len(tids_list)):
        tid_calls = extract_traceid_rows(df, tids_list[i])
        t_edges = [] # list of edges of trace
        for call in tid_calls:
            t_edges.append([call.um, call.dm])
    G = nx.DiGraph()
    G.add_edges_from(t_edges)
    return G
    

In [4]:
# types :  ['userDefined', 'db', 'http', 'mq', 'rpc', 'mc']
# Extract to dataframe
df = csv_to_df("./MSCallGraph_0.csv")

# Extract all tids
tids_list = df['traceid'].unique()
print("Num of tids: ", len(tids_list))

Num of tids:  130512


In [18]:
'''
Metrics collected: trace_depth, initial_node, sf_count, sl_count, ged
'''

ctr = 0
trace_met_dict = {}
for i in tqdm(range(len(tids_list))):
    ctr += 1
    # if i in [0,1,2]:
    #     continue
    tid_calls = extract_traceid_rows(df, tids_list[i])
    initial_node = ""
    t_edges = [] # list of edges of trace
    t_sf_ctr = 0
    t_sl_ctr = 0
    trace_depth = 0
    for call in tid_calls:
        t_edges.append([call.um, call.dm])
        call_depth = get_call_depth(call.rpcid)
        if call_depth > trace_depth: # update trace depth
            trace_depth = call_depth
        if call.rpcid == "0": # get initial node of trace
            initial_node = tids_list[i]
        if call.rpctype == "db": # get sf sl node count
            t_sf_ctr += 1
        else: 
            t_sl_ctr += 1
    
    G = nx.DiGraph()
    G.add_edges_from(t_edges) # build trace graph
    # plot_graph_simple(G, 'main_graph.png')
    G_base = gen_base_graph(trace_depth)
    # plot_graph_simple(G_base, 'base_graph.png') 
    # Exit ged in 3 iters
    max_ged_iter = 2
    for j, distance in enumerate(optimize_graph_edit_distance(G, G_base)):
        if j >= max_ged_iter:
            ged = distance
            break
    trace_met_dict[tids_list[i]] = [trace_depth, initial_node, t_sf_ctr, t_sl_ctr, ged]
    # print(trace_met_dict)
    if ctr == 5:
        break

print(trace_met_dict)
    
        

  0%|          | 0/130512 [00:00<?, ?it/s]

  0%|          | 4/130512 [00:35<318:52:26,  8.80s/it] 

{'0b133c1915919238193454000e5d37': [14, '', 1, 539, 59.0], '0b1339ef15919237750706000eded3': [13, '0b1339ef15919237750706000eded3', 1, 181, 88.0], '0b11355e15919238739898000e4315': [12, '0b11355e15919238739898000e4315', 1, 164, 79.0], '0b133c0e15919237695864000e207d': [5, '', 0, 16, 16.0], '0b133c0e15919236640847000e1f94': [3, '', 0, 4, 16.0]}





In [None]:
# Check
