In [1]:
import json
import datetime
import numpy as np

In [2]:
from collections import defaultdict

In [3]:
edgelist_breakouts = {}

with open('../data/reddit/siacoin_grouping_edgelists.json', 'r') as dataset_json:
    edgelist_breakouts = json.loads(dataset_json.read())

In [4]:
def sorter(key):
    parts = key.split('-')
    
    year = parts[0]
    month = parts[1]
    
    if len(month) == 1:
        month = f'0{month}'
        
    return f'{year}-{month}'

In [5]:
import networkx as nx

In [6]:
def metric_sorter(items):
    def metric_sorter(tup):
        return tup[1]

    return sorted(
        list(items),
        key = metric_sorter,
        reverse = True
    )

metrics = {}

print('started:', '@', datetime.datetime.now())

keys = list(edgelist_breakouts.keys())
for i, key in enumerate(sorted(keys, key = sorter)):
    
    G = nx.Graph()

    iteractions = edgelist_breakouts[key]
    for interaction in iteractions:
        n1 = interaction[0]
        n2 = interaction[1]

        if G.has_edge(n1, n2):
            G[n1][n2]['weight'] += 1
        else:
            G.add_edge(n1, n2, weight = 1)
            
    degree = nx.degree_centrality(G).items()
    closeness = nx.closeness_centrality(G).items()
    betweenness = nx.betweenness_centrality(G).items()
    pagerank = nx.pagerank(G).items()
         
    metrics[key] = {
        'Degree': metric_sorter(degree),
        'Closeness': metric_sorter(closeness),
        'Betweenness': metric_sorter(betweenness),
        'Pagerank': metric_sorter(pagerank)
    }
    
    print('completed:', key, '@', datetime.datetime.now())

started: @ 2020-04-23 07:51:19.574246
completed: 2016-4 @ 2020-04-23 07:51:19.575393
completed: 2016-5 @ 2020-04-23 07:51:19.612170
completed: 2016-6 @ 2020-04-23 07:51:19.661741
completed: 2016-7 @ 2020-04-23 07:51:19.682079
completed: 2016-8 @ 2020-04-23 07:51:19.692331
completed: 2016-9 @ 2020-04-23 07:51:19.704108
completed: 2016-10 @ 2020-04-23 07:51:19.710180
completed: 2016-11 @ 2020-04-23 07:51:19.712171
completed: 2016-12 @ 2020-04-23 07:51:19.716879
completed: 2017-1 @ 2020-04-23 07:51:19.726371
completed: 2017-2 @ 2020-04-23 07:51:19.731721
completed: 2017-3 @ 2020-04-23 07:51:19.745208
completed: 2017-4 @ 2020-04-23 07:51:19.762078
completed: 2017-5 @ 2020-04-23 07:51:23.455393
completed: 2017-6 @ 2020-04-23 07:52:17.149714
completed: 2017-7 @ 2020-04-23 07:52:27.640429
completed: 2017-8 @ 2020-04-23 07:52:32.658964
completed: 2017-9 @ 2020-04-23 07:52:33.859108
completed: 2017-10 @ 2020-04-23 07:52:34.896001
completed: 2017-11 @ 2020-04-23 07:52:35.826684
completed: 2017-1

In [7]:
with open('../data/reddit/siacoin_centrality_metrics.json', 'w') as metrics_json:
    metrics_json.write(json.dumps(metrics))

In [8]:
metrics['2016-10']

{'Degree': [('in-cred-u-lous', 0.8076923076923077),
  ('Fornax96', 0.15384615384615385),
  ('Coinosphere', 0.15384615384615385),
  ('Toboxx', 0.15384615384615385),
  ('cmbartley', 0.15384615384615385),
  ('humbrie', 0.15384615384615385),
  ('doodlemania', 0.11538461538461539),
  ('jacobvschmidt', 0.11538461538461539),
  ('Taek42', 0.11538461538461539),
  ('bSalm0n', 0.07692307692307693),
  ('wolfchange', 0.07692307692307693),
  ('Lorenzo000', 0.07692307692307693),
  ('morantis2015', 0.07692307692307693),
  ('mtlynch', 0.07692307692307693),
  ('doctorWarm', 0.07692307692307693),
  ('walloon5', 0.07692307692307693),
  ('darkFunction', 0.07692307692307693),
  ('phalacee', 0.07692307692307693),
  ('thederpill', 0.07692307692307693),
  ('0nlyNow', 0.038461538461538464),
  ('coolfarmer', 0.038461538461538464),
  ('mesquka', 0.038461538461538464),
  ('Nachbar90', 0.038461538461538464),
  ('Wredditing', 0.038461538461538464),
  ('fengtaobuaa', 0.038461538461538464),
  ('rogvirtualmoney', 0.038

In [9]:
def filter_metrics(metrics_by_month, take = 10):
    potential_filter = defaultdict(lambda: False)
    for key in metrics_by_month.keys():
        authors = (
            author
            for author, metric
            in metrics_by_month[key][:take]
        )
        for author in authors:
            potential_filter[author] = True

    return potential_filter

potential_filter = filter_metrics(metrics['2016-10'])
potential_filter

defaultdict(<function __main__.filter_metrics.<locals>.<lambda>()>,
            {'in-cred-u-lous': True,
             'Fornax96': True,
             'Coinosphere': True,
             'Toboxx': True,
             'cmbartley': True,
             'humbrie': True,
             'doodlemania': True,
             'jacobvschmidt': True,
             'Taek42': True,
             'bSalm0n': True,
             'thederpill': True,
             '0nlyNow': True,
             'coolfarmer': True,
             'wolfchange': True,
             'Lorenzo000': True})

In [10]:
assert potential_filter['in-cred-u-lous'] ## true

In [11]:
assert not potential_filter['bob_dole'] ## false