In [1]:
import snap
import re
import os
from datetime import datetime
from pytz import timezone
from copy import deepcopy
import csv

In [21]:
input_dir = ('output/usernets')
in_multi_delete = snap.TFIn(os.path.join(input_dir, 'user_tneanet.graph'))
in_multi_nodelete = snap.TFIn(os.path.join(input_dir, 'user_tneanet_nodelete.graph'))
in_simple_delete = snap.TFIn(os.path.join(input_dir, 'user_tngraph.graph'))
in_simple_nodelete = snap.TFIn(os.path.join(input_dir, 'user_tngraph_nodelete.graph'))
                               
uunet = snap.TNEANet.Load(in_multi_delete)
uunet_nodelete = snap.TNEANet.Load(in_multi_nodelete)
uunet_simple = snap.TNGraph.Load(in_simple_delete)
uunet_simple_nodelete = snap.TNGraph.Load(in_simple_nodelete)

graphs = {"user_tneanet_nodelete": uunet_nodelete, "user_tngraph_nodelete": uunet_simple_nodelete}

In [22]:
# Features we are interested in
# network_features = ["In_Degree", "Out_Degree", "Pagerank", "Betweenness_Centrality", "Closeness_Centrality", "Authority_Score", "Hub_Score"]
network_features = ["node_id", "In_Degree", "Out_Degree", "Pagerank", "Authority_Score", "Hub_Score"]

In [25]:
def create_network_features(graph_name, graph):
    # Setup network output feature files
    outfile = 'output/network_features_{}_jan2012.tsv'.format(graph_name)
    outfile_network_features = open(outfile, 'w')
    output_network_features = csv.DictWriter(outfile_network_features, network_features, delimiter='\t')
    output_network_features.writeheader()

    # User network feature scores
    user_network_scores = {} # node_id -> (network score like in degree)
    network_feature_template = {k: 0 for k in network_features}

    # Update user's current info from username network feature result
    def update_user_network_scores(node_id, feature_name, feature_value):
        assert(feature_name in network_features)
        if node_id not in user_network_scores: # Populate with zeros
            user_network_scores[node_id] = deepcopy(network_feature_template)

        network_scores = user_network_scores[node_id]
        network_scores[feature_name] = feature_value
        
    # Pagerank scores
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH)
    for node_id in PRankH:
        update_user_network_scores(node_id, "Pagerank", PRankH[node_id])
        
    # In degree scores
    for node in graph.Nodes():
        node_id = node.GetId()
        in_deg = node.GetInDeg()
        update_user_network_scores(node_id, "In_Degree", in_deg)
        
    # Out Degree scores
    for node in graph.Nodes():
        node_id = node.GetId()
        out_deg = node.GetOutDeg()
        update_user_network_scores(node_id, "Out_Degree", out_deg)
        
    # Hubs and Authorities Score
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    for node_id in NIdHubH:
        update_user_network_scores(node_id, "Hub_Score", NIdHubH[node_id])
    for node_id in NIdAuthH:
        update_user_network_scores(node_id, "Authority_Score", NIdAuthH[node_id])
    
    # Write to output file
    for node_id in user_network_scores:
        fields = user_network_scores[node_id]
        fields["node_id"] = node_id
        output_network_features.writerow(fields)

In [26]:
for graph_name in graphs:
    create_network_features(graph_name, graphs[graph_name])

In [None]:
# # Closeness Centrality
# for node in uunet_simple_nodelete.Nodes():
#     node_id = node.GetId()
#     CloseCentr = snap.GetClosenessCentr(uunet_simple_nodelete, node_id)
#     update_user_network_scores(node_id, "Closeness_Centrality", CloseCentr)


In [None]:
# # Betweenness Centrality
# Nodes = snap.TIntFltH()
# Edges = snap.TIntPrFltH()
# snap.GetBetweennessCentr(uunet_nodelete, Nodes, Edges, 0.001)


In [25]:
outfile_network_features.close()
