In [4]:
import json
import networkx as nx

In [2]:
from graph_generation import generate_graph

In [2]:
TEST_FILE = r"/Users/adityamagarde/Documents/UCR/CS240_NR/AnamolyDetection/data/parsed/updates.20031001.0000.json"

In [5]:

json_data = None
with open(TEST_FILE, "r") as my_file:
    json_data = json.load(my_file)

In [5]:
g = generate_graph(json_data)

In [38]:
def add_max_and_average(features_dict, feature_name, feature_values):
    features_dict[f"max_{feature_name}"] = max(feature_values)
    features_dict[f"avg_{feature_name}"] = 0
    if len(feature_values) > 0:
        features_dict[f"avg_{feature_name}"] = sum(feature_values) / len(feature_values)


def parse_path_length_centrality_feautres(graph: nx.graph):
    centrality_features = dict()

    betweeness_centrality_dict = nx.betweenness_centrality(graph)
    betweeness_centralities = [value for _, value in betweeness_centrality_dict.items()]
    add_max_and_average(centrality_features, "betweenness_centrality", betweeness_centralities)

    load_centrality_dict = nx.load_centrality(graph)
    load_centralities = [value for _, value in load_centrality_dict.items()]
    add_max_and_average(centrality_features, "load_centrality", load_centralities)

    closeness_centrality_dict = nx.closeness_centrality(graph)
    closeness_centralities = [value for _, value in closeness_centrality_dict.items()]
    add_max_and_average(centrality_features, "closeness_centrality", closeness_centralities)

    harmonic_centrality_dict = nx.harmonic_centrality(graph)
    harmonic_centralities = [value for _, value in harmonic_centrality_dict.items()]
    add_max_and_average(centrality_features, "harmonic_centrality", harmonic_centralities)

    print(centrality_features)
    return centrality_features


def parse_neighbourliness_centrality_features(graph: nx.graph):
    centrality_features = dict()

    degree_centrality_dict = nx.degree_centrality(graph)
    degree_centralities = [value for _, value in degree_centrality_dict.items()]
    add_max_and_average(centrality_features, "degree_centrality", degree_centralities)

    eigenvector_centrality_dict = nx.eigenvector_centrality(graph)
    eigenvector_centralities = [value for _, value in eigenvector_centrality_dict.items()]
    add_max_and_average(centrality_features, "eigenvector_centrality", eigenvector_centralities)

    return centrality_features


def parse_centrality_features(graph: nx.graph, parse_path_length_centralities: bool, parse_neighbourliness: bool):

    pl_centralities = parse_path_length_centrality_feautres(graph) if parse_path_length_centralities else dict()
    neighbourliness_centralities = parse_neighbourliness_centrality_features(graph) if parse_neighbourliness else dict()

    return pl_centralities | neighbourliness_centralities


In [44]:
def parse_clique_features(graph: nx.graph):
    clique_features = dict()

    number_of_cliques = [value for _, value in nx.number_of_cliques(graph).items()]
    add_max_and_average(clique_features, "number_of_cliques", number_of_cliques)

    size_of_cliques = [value for _, value in nx.node_clique_number(graph).items()]
    add_max_and_average(clique_features, "size_of_cliques", size_of_cliques)

    return clique_features


In [46]:
parse_clique_features(g)

{'max_number_of_cliques': 18,
 'avg_number_of_cliques': 2.520661157024793,
 'max_size_of_cliques': 4,
 'avg_size_of_cliques': 2.2892561983471076}

In [47]:
nx.clustering(g)

{'513': 0.19444444444444445,
 '3320': 0.06666666666666667,
 '1239': 0.14285714285714285,
 '701': 0.07272727272727272,
 '7911': 0.09523809523809523,
 '26608': 0,
 '11537': 0.047619047619047616,
 '3912': 0,
 '3356': 0.2222222222222222,
 '15412': 0,
 '20858': 0,
 '21152': 0,
 '17395': 0,
 '209': 0.03571428571428571,
 '4519': 0,
 '9121': 0,
 '9021': 0,
 '29060': 0,
 '25364': 0,
 '8721': 0,
 '852': 0,
 '559': 0.3,
 '20965': 0.13333333333333333,
 '12956': 0.3333333333333333,
 '7004': 0.3333333333333333,
 '15311': 1.0,
 '2914': 0.1,
 '3944': 0,
 '3561': 0.1388888888888889,
 '7418': 1.0,
 '12654': 0.1,
 '13237': 0,
 '4637': 0.3333333333333333,
 '1221': 0,
 '17477': 0,
 '15804': 0,
 '6453': 0,
 '8452': 0,
 '1853': 0,
 '3291': 0.5,
 '5511': 0.16666666666666666,
 '17927': 0,
 '1785': 0.3333333333333333,
 '2603': 0,
 '28781': 0,
 '2048': 0,
 '6762': 0,
 '6730': 0.3333333333333333,
 '702': 0,
 '3216': 0,
 '22584': 0,
 '19092': 0,
 '3908': 0,
 '16804': 0,
 '24': 0,
 '10343': 0,
 '568': 0,
 '1733': 0

In [48]:
nx.triangles(g)

{'513': 7,
 '3320': 14,
 '1239': 13,
 '701': 4,
 '7911': 2,
 '26608': 0,
 '11537': 1,
 '3912': 0,
 '3356': 8,
 '15412': 0,
 '20858': 0,
 '21152': 0,
 '17395': 0,
 '209': 1,
 '4519': 0,
 '9121': 0,
 '9021': 0,
 '29060': 0,
 '25364': 0,
 '8721': 0,
 '852': 0,
 '559': 3,
 '20965': 2,
 '12956': 1,
 '7004': 1,
 '15311': 1,
 '2914': 1,
 '3944': 0,
 '3561': 5,
 '7418': 1,
 '12654': 1,
 '13237': 0,
 '4637': 1,
 '1221': 0,
 '17477': 0,
 '15804': 0,
 '6453': 0,
 '8452': 0,
 '1853': 0,
 '3291': 3,
 '5511': 1,
 '17927': 0,
 '1785': 1,
 '2603': 0,
 '28781': 0,
 '2048': 0,
 '6762': 0,
 '6730': 1,
 '702': 0,
 '3216': 0,
 '22584': 0,
 '19092': 0,
 '3908': 0,
 '16804': 0,
 '24': 0,
 '10343': 0,
 '568': 0,
 '1733': 0,
 '6035': 0,
 '9837': 0,
 '10886': 0,
 '6059': 0,
 '6509': 0,
 '10764': 0,
 '297': 0,
 '3549': 0,
 '8319': 0,
 '2907': 0,
 '30078': 1,
 '703': 0,
 '7660': 0,
 '3836': 0,
 '7018': 3,
 '11172': 0,
 '7125': 0,
 '1913': 0,
 '3542': 0,
 '7474': 0,
 '4323': 5,
 '23137': 0,
 '8386': 0,
 '20591': 0

In [49]:
nx.square_clustering(g)

{'513': 0.03485254691689008,
 '3320': 0.03569267997580157,
 '1239': 0.05136612021857923,
 '701': 0.06297229219143577,
 '7911': 0.040983606557377046,
 '26608': 0,
 '11537': 0.010752688172043012,
 '3912': 0,
 '3356': 0.07177033492822966,
 '15412': 0.08571428571428572,
 '20858': 0.0,
 '21152': 0.0,
 '17395': 0,
 '209': 0.028,
 '4519': 0,
 '9121': 0.02564102564102564,
 '9021': 0.0,
 '29060': 0,
 '25364': 0,
 '8721': 0.0,
 '852': 0,
 '559': 0.08333333333333333,
 '20965': 0.07446808510638298,
 '12956': 0.03571428571428571,
 '7004': 0.14285714285714285,
 '15311': 1.0,
 '2914': 0.046875,
 '3944': 0,
 '3561': 0.04197530864197531,
 '7418': 1.0,
 '12654': 0.14893617021276595,
 '13237': 0.09090909090909091,
 '4637': 0.1,
 '1221': 0.0,
 '17477': 0.043478260869565216,
 '15804': 0,
 '6453': 0.0,
 '8452': 0.0,
 '1853': 0.2857142857142857,
 '3291': 0.07368421052631578,
 '5511': 0.011904761904761904,
 '17927': 0,
 '1785': 0.04,
 '2603': 0.2857142857142857,
 '28781': 0.0,
 '2048': 0,
 '6762': 0.071428571

In [50]:
nx.average_neighbor_degree(g)

{'513': 6.555555555555555,
 '3320': 5.142857142857143,
 '1239': 6.428571428571429,
 '701': 4.909090909090909,
 '7911': 7.142857142857143,
 '26608': 7.0,
 '11537': 3.2857142857142856,
 '3912': 7.0,
 '3356': 7.444444444444445,
 '15412': 7.333333333333333,
 '20858': 1.75,
 '21152': 3.0,
 '17395': 14.0,
 '209': 5.625,
 '4519': 8.0,
 '9121': 5.2,
 '9021': 3.0,
 '29060': 3.0,
 '25364': 4.0,
 '8721': 2.6666666666666665,
 '852': 14.0,
 '559': 5.2,
 '20965': 4.5,
 '12956': 11.0,
 '7004': 3.0,
 '15311': 2.25,
 '2914': 7.8,
 '3944': 5.0,
 '3561': 7.111111111111111,
 '7418': 3.5,
 '12654': 3.8,
 '13237': 13.0,
 '4637': 12.333333333333334,
 '1221': 4.0,
 '17477': 3.2,
 '15804': 4.0,
 '6453': 11.5,
 '8452': 2.0,
 '1853': 5.5,
 '3291': 10.0,
 '5511': 8.25,
 '17927': 4.0,
 '1785': 5.666666666666667,
 '2603': 5.5,
 '28781': 2.0,
 '2048': 8.0,
 '6762': 9.75,
 '6730': 6.333333333333333,
 '702': 7.8,
 '3216': 5.0,
 '22584': 8.0,
 '19092': 5.0,
 '3908': 2.0,
 '16804': 6.5,
 '24': 4.5,
 '10343': 2.0,
 '568'

In [52]:
nx.eccentricity(g)

{'513': 7,
 '3320': 6,
 '1239': 5,
 '701': 6,
 '7911': 6,
 '26608': 7,
 '11537': 8,
 '3912': 9,
 '3356': 5,
 '15412': 6,
 '20858': 7,
 '21152': 8,
 '17395': 6,
 '209': 6,
 '4519': 7,
 '9121': 6,
 '9021': 7,
 '29060': 8,
 '25364': 8,
 '8721': 7,
 '852': 6,
 '559': 7,
 '20965': 8,
 '12956': 7,
 '7004': 8,
 '15311': 9,
 '2914': 6,
 '3944': 7,
 '3561': 6,
 '7418': 9,
 '12654': 8,
 '13237': 7,
 '4637': 6,
 '1221': 7,
 '17477': 8,
 '15804': 8,
 '6453': 7,
 '8452': 8,
 '1853': 9,
 '3291': 7,
 '5511': 7,
 '17927': 8,
 '1785': 7,
 '2603': 9,
 '28781': 8,
 '2048': 7,
 '6762': 6,
 '6730': 6,
 '702': 6,
 '3216': 7,
 '22584': 7,
 '19092': 7,
 '3908': 8,
 '16804': 7,
 '24': 9,
 '10343': 10,
 '568': 7,
 '1733': 8,
 '6035': 9,
 '9837': 7,
 '10886': 9,
 '6059': 10,
 '6509': 9,
 '10764': 8,
 '297': 9,
 '3549': 7,
 '8319': 8,
 '2907': 7,
 '30078': 6,
 '703': 7,
 '7660': 9,
 '3836': 10,
 '7018': 6,
 '11172': 7,
 '7125': 8,
 '1913': 7,
 '3542': 8,
 '7474': 7,
 '4323': 6,
 '23137': 7,
 '8386': 7,
 '20591': 

In [55]:
def parse_clustering_features(graph: nx.graph):
    clustering_features = dict()

    clustering_coefficient_values = [value for _, value in nx.clustering(graph).items()]
    add_max_and_average(clustering_features, "clustering_coefficent", clustering_coefficient_values)

    traingle_values = [value for _, value in nx.triangles(graph).items()]
    add_max_and_average(clustering_features, "traingle_coefficient", traingle_values)

    square_clustering_coefficient_values = [value for _, value in nx.square_clustering(graph).items()]
    add_max_and_average(clustering_features, "square_clustering_coefficient", square_clustering_coefficient_values)

    return clustering_features




In [56]:
parse_clustering_features(g)

{'max_clustering_coefficent': 1.0,
 'avg_clustering_coefficent': 0.08002516308301433,
 'max_traingle_coefficient': 14,
 'avg_traingle_coefficient': 0.71900826446281,
 'max_square_clustering_coefficient': 1.0,
 'avg_square_clustering_coefficient': 0.04411756789328106}

In [57]:
def parse_misc_features(graph: nx.graph):
    misc_features = dict()

    avg_neighbour_degeees = [value for _, value in nx.average_neighbor_degree(graph).items()]
    add_max_and_average(misc_features, "avg_neighbour_degree", avg_neighbour_degeees)

    eccentricities = [value for _, value in nx.eccentricity(graph).items()]
    add_max_and_average(misc_features, "eccentricity", eccentricities)

    return misc_features
    


In [58]:
parse_misc_features(g)

{'max_avg_neighbour_degree': 14.0,
 'avg_avg_neighbour_degree': 5.219795058018199,
 'max_eccentricity': 10,
 'avg_eccentricity': 7.4958677685950414}

In [1]:
from graph_feat import parse_graph_features

In [6]:
parse_graph_features(json_data)

{'max_betweenness_centrality': 0.5691047833664511, 'avg_betweenness_centrality': 0.0280077320184272, 'max_load_centrality': 0.5630940709617182, 'avg_load_centrality': 0.028007732018427194, 'max_closeness_centrality': 0.40816326530612246, 'avg_closeness_centrality': 0.24042175147559577, 'max_harmonic_centrality': 59.81666666666673, 'avg_harmonic_centrality': 32.66418076872621}


{'max_betweenness_centrality': 0.5691047833664511,
 'avg_betweenness_centrality': 0.0280077320184272,
 'max_load_centrality': 0.5630940709617182,
 'avg_load_centrality': 0.028007732018427194,
 'max_closeness_centrality': 0.40816326530612246,
 'avg_closeness_centrality': 0.24042175147559577,
 'max_harmonic_centrality': 59.81666666666673,
 'avg_harmonic_centrality': 32.66418076872621,
 'max_degree_centrality': 0.175,
 'avg_degree_centrality': 0.024655647382920073,
 'max_eigenvector_centrality': 0.48811723850483757,
 'avg_eigenvector_centrality': 0.05021648396048485,
 'max_number_of_cliques': 18,
 'avg_number_of_cliques': 2.520661157024793,
 'max_size_of_cliques': 4,
 'avg_size_of_cliques': 2.2892561983471076,
 'max_clustering_coefficent': 1.0,
 'avg_clustering_coefficent': 0.08002516308301433,
 'max_traingle_coefficient': 14,
 'avg_traingle_coefficient': 0.71900826446281,
 'max_square_clustering_coefficient': 1.0,
 'avg_square_clustering_coefficient': 0.04411756789328106,
 'max_avg_neigh