In [1]:
import networkx
from networkx.readwrite import json_graph
from pathlib import Path
import utilities
import os

### Load Data

In [2]:
# Take the Graph in 2019
graph_address = "/home/c6/Desktop/OpenWPM/jsons/AST/CDX_results/Graphs/2019_old/Graph.json"
g = utilities.read_json(graph_address)
G = json_graph.node_link_graph(g)

# Take the nodes in FP cluster
fp_cluster_addrr = "/home/c6/Desktop/OpenWPM/jsons/community_tracking/real_graphs/2019/C_7_1"
fp_cluster_nodes = utilities.read_json(fp_cluster_addrr)

# extract the subgraph
fp_graph = G.subgraph(fp_cluster_nodes)

In [3]:
print(len(fp_cluster_nodes))
print(len(fp_graph.nodes()))
print(len(fp_graph.edges()))

761
761
118021


In [4]:
fp_graph.degree("screen")

709

### calculate centrality of nodes

In [5]:
base_dir = "/home/c6/Desktop/OpenWPM/jsons/community_tracking/real_graphs/centralitty"

### Current-flow betweenness centrality uses an electrical current model for information spreading in contrast to betweenness centrality which uses shortest paths.

In [6]:
centrality_list = networkx.current_flow_betweenness_centrality(fp_graph, normalized=False, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "current_flow_betweenness_centrality.json"), indexed_weights)

In [7]:
trackr_radar_weights_dict = utilities.read_json(os.path.join(base_dir, "tracker_radar_weights.json"))
trackr_radar_weights = {key.split('.')[-1]:value for key, value in sorted(trackr_radar_weights_dict.items(), key = lambda item: item[1], reverse=True)}
utilities.write_json(os.path.join(base_dir, "trackr_radar_weights.json"), trackr_radar_weights)

### Average weight

In [8]:
list_threshold = len(trackr_radar_weights)
#list_threshold = 10
top_ten = 1
average_weight = 0
accu_weight = 0
penalty_rank = len(indexed_weights)
for key, weight in trackr_radar_weights.items():
    if top_ten > list_threshold:
        break
    if key in indexed_weights.keys():
        average_weight += (weight * indexed_weights[key]["rank"])
        accu_weight += weight
    else:
        average_weight += (weight * penalty_rank)
        accu_weight += weight
    top_ten += 1
average_weight = average_weight / accu_weight

### Best, worst ranks

In [9]:
top_ten = 1 
Best_avg_rank = 0
worst_avg_rank = 0
best_ranking = 1
worst_ranking = len(indexed_weights)
for key, weight in trackr_radar_weights.items():
    if top_ten > list_threshold:
        break
    Best_avg_rank += weight * best_ranking
    best_ranking += 1
    worst_avg_rank += weight * worst_ranking
    worst_ranking -= 1
    top_ten += 1    
Best_avg_rank = Best_avg_rank / accu_weight
worst_avg_rank = worst_avg_rank / accu_weight

In [10]:
print("Average Rank: ", average_weight)
print("Best Average Rank: ", Best_avg_rank)
print("Worst Average Rank: ", worst_avg_rank)

Average Rank:  598.7813731680827
Best Average Rank:  6.573528932327371
Worst Average Rank:  755.4264710676731


### check non-available api keywords in FP cluster

In [11]:
original_web_tracker = utilities.read_json(os.path.join(base_dir,"original_tracker_radar.json"))
for key in trackr_radar_weights.keys():
    if key not in indexed_weights.keys():
        for original_key in original_web_tracker.keys():
            if original_key.split('.')[-1] == key:
                print(original_key)

Navigator.prototype.keyboard
Navigator.prototype.presentation
DeviceMotionEvent.prototype.accelerationIncludingGravity
Navigator.prototype.webkitPersistentStorage
DeviceOrientationEvent.prototype.beta
DeviceOrientationEvent.prototype.alpha
DeviceOrientationEvent.prototype.absolute
Navigator.prototype.mediaCapabilities
Navigator.prototype.storage
Navigator.prototype.permissions
CanvasRenderingContext2D.prototype.measureText
CanvasRenderingContext2D.prototype.getImageData
Screen.prototype.availLeft
MediaSource.isTypeSupported
Navigator.prototype.javaEnabled
Navigator.prototype.product
Navigator.prototype.vendor
Navigator.prototype.language
window.localStorage
HTMLMediaElement.prototype.canPlayType
URL.createObjectURL
Navigator.prototype.userAgent


### current_flow_betweenness_centrality

In [12]:
centrality_list = networkx.algorithms.centrality.current_flow_betweenness_centrality(fp_graph, normalized=False, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "current_flow_betweenness_centrality.json"), indexed_weights)

### approximate current-flow betweenness centrality for nodes.

In [13]:
centrality_list = networkx.algorithms.centrality.approximate_current_flow_betweenness_centrality(fp_graph, normalized=False, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "approximate_current_flow_betweenness_centrality.json"), indexed_weights)

### betweenness_centrality
Compute the shortest-path betweenness centrality for nodes

In [14]:
centrality_list = networkx.algorithms.centrality.betweenness_centrality(fp_graph, normalized=False, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "betweenness_centrality.json"), indexed_weights)

### eigenvector_centrality
Compute the eigenvector centrality for the graph

In [15]:
centrality_list = networkx.algorithms.centrality.eigenvector_centrality(fp_graph, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "eigenvector_centrality.json"), indexed_weights)

### degree_centrality

In [16]:
centrality_list = networkx.algorithms.centrality.degree_centrality(fp_graph)
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "degree_centrality.json"), indexed_weights)

### eigenvector_centrality_numpy

In [17]:
centrality_list = networkx.algorithms.centrality.eigenvector_centrality_numpy(fp_graph, weight="weight")
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "eigenvector_centrality_numpy.json"), indexed_weights)

### katz_centrality

In [18]:
centrality_list = networkx.algorithms.centrality.katz_centrality(fp_graph, weight="weight", normalized=False)
our_weights = {key:value for key, value in sorted(centrality_list.items(), key = lambda item : item[1], reverse=True)}
# give rank to apis
index = 1
indexed_weights = {}
for key, value in our_weights.items():
    indexed_weights[key] = {"weight": value, "rank":index}
    index += 1
utilities.write_json(os.path.join(base_dir, "katz_centrality.json"), indexed_weights)

### calvulate average_weight

In [19]:
#list_threshold = len(trackr_radar_weights)
list_threshold = 10
top_ten = 1
average_weight = 0
accu_weight = 0
penalty_rank = len(indexed_weights)
for key, weight in trackr_radar_weights.items():
    if top_ten > list_threshold:
        break
    if key in indexed_weights.keys():
        average_weight += (weight * indexed_weights[key]["rank"])
        accu_weight += weight
    else:
        average_weight += (weight * penalty_rank)
        accu_weight += weight
    top_ten += 1
average_weight = average_weight / accu_weight
average_weight

625.3802780171624

In [21]:
fp_dy_cluster = utilities.read_json("/home/c6/Desktop/OpenWPM/jsons/community_tracking/real_graphs/DY_COMM"
                                        "/selected_comms/selected_DY_communities.json")["26"]
static_clusters_base = "/home/c6/Desktop/OpenWPM/jsons/community_tracking/real_graphs"
suspicious_apis = {}
for static_cluster in fp_dy_cluster:
    static_name = static_cluster[0]
    static_year = str(static_cluster[1])
    suspicious_apis[static_year] = list(utilities.read_json(os.path.join(static_clusters_base, static_year, static_name)))

In [22]:
len(suspicious_apis["2019"])

761