In [None]:
import snap
import re
import os

In [None]:
comment_file = 'output/reddit_comments_jan2012.txt'

In [None]:
int_attrs = ['score', 'gilded', 'created_utc']
str_attrs = ['subreddit', 'comment_id']
uunet = snap.TNEANet.New()
uunet_nodelete = snap.TNEANet.New() # No node for [deleted] placeholder, no edges for [deleted] or [removed] comments
uu_simple = snap.TNGraph.New() # Copy without attributes or multi-edges
uu_simple_nodelete = snap.TNGraph.New() # Copy without attributes or multi-edges. Edges ARE added if comment text
                                        # is [removed] or [deleted], unlike uunet_nodelete

for ia in int_attrs:
    uunet.AddIntAttrE(ia)
    uunet_nodelete.AddIntAttrE(ia)
for sa in str_attrs:
    uunet.AddStrAttrE(sa)
    uunet_nodelete.AddStrAttrE(sa)
    
uunet.AddStrAttrN('username')
uunet_nodelete.AddStrAttrN('username')

In [None]:
comments = open(comment_file)
header = comments.readline()[1:].rstrip('\n').split('\t')
fields = {name: i for (i, name) in enumerate(header)}
usernames_to_nids = {}

# Iterate over all comments
for (i, com) in enumerate(comments):
    entries = com.rstrip('\n').split('\t')
    entries = {fieldname: entries[fields[fieldname]] for fieldname in fields}
    subreddit = entries['Subreddit']
    commenter = entries['Commenter'] # standardized to lowercase
    commentee = entries['Commentee'] # ditto
    comment_id = entries['Comment_ID']
    
    score = int(entries['Score'])
    gilded = int(entries['Gilded'])
    created_utc = int(entries['Created_UTC'])
    
    text_deleted = int(entries['Text_Deleted'])
    
    # create nodes for commenter and commentee if they don't exist
    for user in (commenter, commentee):
        if user not in usernames_to_nids:
            nid = usernames_to_nids[user] = uunet.AddNode()
            uunet.AddStrAttrDatN(nid, user, 'username')
            uu_simple.AddNode(nid)
            if user != '[deleted]':
                uunet_nodelete.AddNode(nid)
                uunet_nodelete.AddStrAttrDatN(nid, user, 'username')
                uu_simple_nodelete.AddNode(nid)
    
    commenter_nid = usernames_to_nids[commenter]
    commentee_nid = usernames_to_nids[commentee]
    
    target_nets = [uunet]
    if commenter != '[deleted]' and commentee != '[deleted]' and not text_deleted:
        target_nets.append(uunet_nodelete)
    
    # Add comment edge with attributes to TNEANet(s)
    for net in target_nets:
        eid = net.AddEdge(commenter_nid, commentee_nid)
        
        net.AddIntAttrDatE(eid, score, 'score')
        net.AddIntAttrDatE(eid, gilded, 'gilded')
        net.AddIntAttrDatE(eid, created_utc, 'created_utc')
        net.AddStrAttrDatE(eid, subreddit, 'subreddit')
        net.AddStrAttrDatE(eid, comment_id, 'comment_id')
        
    # Add interaction edge (if not already present) to TNGraph(s)
    uu_simple.AddEdge(commenter_nid, commentee_nid)
    if commenter != '[deleted]' and commentee != '[deleted]':
        uu_simple_nodelete.AddEdge(commenter_nid, commentee_nid)
    
    # Show progress
    if i % 100000 == 0 and i > 0:
        print(i)

In [None]:
# Save networks
output_dir = ('output/usernets')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

out_multi_delete = snap.TFOut(os.path.join(output_dir, 'user_tneanet.graph'))
out_multi_nodelete = snap.TFOut(os.path.join(output_dir, 'user_tneanet_nodelete.graph')
out_simple_delete = snap.TFOut(os.path.join(output_dir, 'user_tngraph.graph'))
out_simple_nodelete = snap.TFOut(os.path.join(output_dir, 'user_tngraph_nodelete.graph'))

uunet.Save(out_multi_delete)
out_multi_delete.Flush()
                                
uunet_nodelete.Save(out_multi_nodelete)
out_multi_nodelete.Flush()
                                
uu_simple.Save(out_simple_delete)
out_simple_delete.Flush()
                                
uu_simple_nodelete.Save(out_simple_nodelete)
out_simple_nodelete.Flush()