In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
# Modifying the path so we can import from src directory.
import sys
import os
sys.path.append(os.path.abspath('..'))

from collections import Counter, defaultdict
from itertools import chain
import copy
import pickle
import random
import time

import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
from pyvis.network import Network

from src.example_graphs import simple_undirected_graph, simple_directed_graph
from src.UndirectedGraph import UndirectedGraph
from src.DirectedGraph import DirectedGraph
from src.DataLoader import DataLoader
from src.GraphCreator import GraphCreator, NetworkXGraphCreator

from src.io_helpers import pickle_obj, load_pickled_obj
from src.networkx_helpers import combine_graphs
from src.networkx_multigraph_helpers import (get_edge_attrs, aggregate_numeric_properties,
                                             sum_numeric_properties, count_edges,
                                             sharing_reciprocity)

In [3]:
ROOT_DIRECTORY = os.path.split(os.getcwd())[0]
DATA_DIRECTORY = os.path.join(ROOT_DIRECTORY, 'data')
PICKLED_DATA_DIRECTORY = os.path.join(ROOT_DIRECTORY, 'data_pickle')

### Load our G_weighted graph

In [4]:
G_weighted = load_pickled_obj(os.path.join(PICKLED_DATA_DIRECTORY, 'networkx_weighted_full.pickle'))

# Looking at Sharing Reciprocity
### First, let's look at a subset of our graph.

In [5]:
G_weighted_subset = G_weighted.edge_subgraph(G_weighted.edges('vegan'))

In [6]:
G_weighted_subset['vegan']['worldnews']

{'weight': 26}

In [9]:
sharing_reciprocity(G_weighted_subset, 'vegan', 'worldnews')

{'reciprocity': 1.0, 'summed weights': 26, 'n-to-m': 26, 'm_to_n': 0}

In [10]:
reciprocities = []
focus_node = 'vegan'
for neighbor_node in G_weighted_subset[focus_node]:
    reciprocities.append((neighbor_node, sharing_reciprocity(G_weighted, focus_node, neighbor_node)))

In [11]:
sorted(reciprocities, key=lambda x: x[1]['summed weights'], reverse=True)

[('askreddit',
  {'reciprocity': 1.0, 'summed weights': 57, 'n-to-m': 57, 'm_to_n': 0}),
 ('vegancirclejerk',
  {'reciprocity': 0.05263157894736842,
   'summed weights': 38,
   'n-to-m': 2,
   'm_to_n': 36}),
 ('iama',
  {'reciprocity': 1.0, 'summed weights': 36, 'n-to-m': 36, 'm_to_n': 0}),
 ('news',
  {'reciprocity': 1.0, 'summed weights': 33, 'n-to-m': 33, 'm_to_n': 0}),
 ('todayilearned',
  {'reciprocity': 1.0, 'summed weights': 32, 'n-to-m': 32, 'm_to_n': 0}),
 ('worldnews',
  {'reciprocity': 0.9629629629629629,
   'summed weights': 27,
   'n-to-m': 26,
   'm_to_n': 1}),
 ('science',
  {'reciprocity': 1.0, 'summed weights': 23, 'n-to-m': 23, 'm_to_n': 0}),
 ('subredditdrama',
  {'reciprocity': 0.08695652173913043,
   'summed weights': 23,
   'n-to-m': 2,
   'm_to_n': 21}),
 ('videos',
  {'reciprocity': 0.9545454545454546,
   'summed weights': 22,
   'n-to-m': 21,
   'm_to_n': 1}),
 ('philosophy',
  {'reciprocity': 1.0, 'summed weights': 20, 'n-to-m': 20, 'm_to_n': 0}),
 ('vegetari

In [12]:
sorted(reciprocities, key=lambda x: x[1]['reciprocity'], reverse=True)

[('woahdude',
  {'reciprocity': 1.0, 'summed weights': 2, 'n-to-m': 2, 'm_to_n': 0}),
 ('iama',
  {'reciprocity': 1.0, 'summed weights': 36, 'n-to-m': 36, 'm_to_n': 0}),
 ('frugal',
  {'reciprocity': 1.0, 'summed weights': 1, 'n-to-m': 1, 'm_to_n': 0}),
 ('science',
  {'reciprocity': 1.0, 'summed weights': 23, 'n-to-m': 23, 'm_to_n': 0}),
 ('todayilearned',
  {'reciprocity': 1.0, 'summed weights': 32, 'n-to-m': 32, 'm_to_n': 0}),
 ('adviceanimals',
  {'reciprocity': 1.0, 'summed weights': 11, 'n-to-m': 11, 'm_to_n': 0}),
 ('calgary',
  {'reciprocity': 1.0, 'summed weights': 1, 'n-to-m': 1, 'm_to_n': 0}),
 ('offmychest',
  {'reciprocity': 1.0, 'summed weights': 1, 'n-to-m': 1, 'm_to_n': 0}),
 ('news',
  {'reciprocity': 1.0, 'summed weights': 33, 'n-to-m': 33, 'm_to_n': 0}),
 ('wtf',
  {'reciprocity': 1.0, 'summed weights': 15, 'n-to-m': 15, 'm_to_n': 0}),
 ('weddingplanning',
  {'reciprocity': 1.0, 'summed weights': 1, 'n-to-m': 1, 'm_to_n': 0}),
 ('askreddit',
  {'reciprocity': 1.0, 's

## Now, let's calculate reciprocities for the whole graph

In [13]:
full_reciprocities = []
for focus_node in G_weighted:
    for neighbor_node in G_weighted[focus_node]:
        full_reciprocities.append({
            'focus_node': focus_node,
            'neighbor_node': neighbor_node,
            'reciprocity_dict': sharing_reciprocity(G_weighted, focus_node, neighbor_node)
        })

In [14]:
len(full_reciprocities)

339643

In [15]:
len(G_weighted)

67180

In [16]:
full_reciprocities[0]

{'focus_node': 'leagueoflegends',
 'neighbor_node': 'teamredditteams',
 'reciprocity_dict': {'reciprocity': 0.6,
  'summed weights': 65,
  'n-to-m': 39,
  'm_to_n': 26}}

In [17]:
full_reciprocities[0]['reciprocity_dict']['reciprocity']

0.6

## Looking at middling reciprocities

In [18]:
# Let's look where reciprocities are between 0.4 and 0.6
middling_reciprocities = [edge for edge in full_reciprocities if abs(edge['reciprocity_dict']['reciprocity']-0.5)<0.1]

In [19]:
len(middling_reciprocities)

28182

In [20]:
middling_reciprocities[:3]

[{'focus_node': 'leagueoflegends',
  'neighbor_node': 'teamredditteams',
  'reciprocity_dict': {'reciprocity': 0.6,
   'summed weights': 65,
   'n-to-m': 39,
   'm_to_n': 26}},
 {'focus_node': 'leagueoflegends',
  'neighbor_node': 'lolchampconcepts',
  'reciprocity_dict': {'reciprocity': 0.5384615384615384,
   'summed weights': 39,
   'n-to-m': 21,
   'm_to_n': 18}},
 {'focus_node': 'leagueoflegends',
  'neighbor_node': 'dogecoin',
  'reciprocity_dict': {'reciprocity': 0.5,
   'summed weights': 4,
   'n-to-m': 2,
   'm_to_n': 2}}]

## Now, let's look where the summed_weight is greater than some amount (people sharing a lot about the other)

In [21]:
middling_reciprocities_sum_gt_20 = [edge for edge in middling_reciprocities if edge['reciprocity_dict']['summed weights']>20]
len(middling_reciprocities_sum_gt_20)

896

In [22]:
middling_reciprocities_sum_gt_50 = [edge for edge in middling_reciprocities if edge['reciprocity_dict']['summed weights']>50]
len(middling_reciprocities_sum_gt_50)

234

In [23]:
middling_reciprocities_sum_gt_100 = [edge for edge in middling_reciprocities if edge['reciprocity_dict']['summed weights']>100]
len(middling_reciprocities_sum_gt_100)

68

Since each edge will be doubled here (since reciprocity is two-way), there are 34 subreddit relationships where people share a lot about each other and the sharing is about equal.

In [24]:
sorted(middling_reciprocities_sum_gt_100, key=lambda x: x['reciprocity_dict']['summed weights'], reverse=True)[:10]

[{'focus_node': 'buildapc',
  'neighbor_node': 'techsupport',
  'reciprocity_dict': {'reciprocity': 0.47863247863247865,
   'summed weights': 351,
   'n-to-m': 168,
   'm_to_n': 183}},
 {'focus_node': 'techsupport',
  'neighbor_node': 'buildapc',
  'reciprocity_dict': {'reciprocity': 0.5213675213675214,
   'summed weights': 351,
   'n-to-m': 183,
   'm_to_n': 168}},
 {'focus_node': 'destinythegame',
  'neighbor_node': 'crucibleplaybook',
  'reciprocity_dict': {'reciprocity': 0.46153846153846156,
   'summed weights': 286,
   'n-to-m': 132,
   'm_to_n': 154}},
 {'focus_node': 'crucibleplaybook',
  'neighbor_node': 'destinythegame',
  'reciprocity_dict': {'reciprocity': 0.5384615384615384,
   'summed weights': 286,
   'n-to-m': 154,
   'm_to_n': 132}},
 {'focus_node': 'subredditdrama',
  'neighbor_node': 'drama',
  'reciprocity_dict': {'reciprocity': 0.46,
   'summed weights': 250,
   'n-to-m': 115,
   'm_to_n': 135}},
 {'focus_node': 'drama',
  'neighbor_node': 'subredditdrama',
  'recip

## Now let's look at people who are ignored

In [25]:
# Ignored, but not quite totally ignored
ignored = [edge for edge in full_reciprocities if edge['reciprocity_dict']['reciprocity']>0.9 and edge['reciprocity_dict']['reciprocity']<1]

In [26]:
len(ignored)

1423

In [27]:
totally_ignored = [edge for edge in full_reciprocities if edge['reciprocity_dict']['reciprocity']==1]

In [28]:
len(totally_ignored)

279691

### Now, let's look at people who share a lot about the other and are still ignored

_**Actually not sure if it's "ignored" or if it should be called "gossipy".**_

In [29]:
# Ignored
print(len([edge for edge in ignored if edge['reciprocity_dict']['summed weights']>50]))
print(len([edge for edge in ignored if edge['reciprocity_dict']['summed weights']>100]))
print(len([edge for edge in ignored if edge['reciprocity_dict']['summed weights']>200]))

342
140
20


In [30]:
# Totally ignored
print(len([edge for edge in totally_ignored if edge['reciprocity_dict']['summed weights']>50]))
print(len([edge for edge in totally_ignored if edge['reciprocity_dict']['summed weights']>100]))
print(len([edge for edge in totally_ignored if edge['reciprocity_dict']['summed weights']>200]))

651
217
44


In [31]:
#Heavily Ignored / Big Media / Gossip / Paparazzi
[edge for edge in ignored if edge['reciprocity_dict']['summed weights']>200][:5]

[{'focus_node': 'bestof',
  'neighbor_node': 'todayilearned',
  'reciprocity_dict': {'reciprocity': 0.9957446808510638,
   'summed weights': 235,
   'n-to-m': 234,
   'm_to_n': 1}},
 {'focus_node': 'bestof',
  'neighbor_node': 'explainlikeimfive',
  'reciprocity_dict': {'reciprocity': 0.9952830188679245,
   'summed weights': 212,
   'n-to-m': 211,
   'm_to_n': 1}},
 {'focus_node': 'bestof',
  'neighbor_node': 'worldnews',
  'reciprocity_dict': {'reciprocity': 0.9953703703703703,
   'summed weights': 216,
   'n-to-m': 215,
   'm_to_n': 1}},
 {'focus_node': 'atletico',
  'neighbor_node': 'soccer',
  'reciprocity_dict': {'reciprocity': 0.9134199134199135,
   'summed weights': 231,
   'n-to-m': 211,
   'm_to_n': 20}},
 {'focus_node': 'goodshibe',
  'neighbor_node': 'dogecoin',
  'reciprocity_dict': {'reciprocity': 0.9739413680781759,
   'summed weights': 307,
   'n-to-m': 299,
   'm_to_n': 8}}]

In [32]:
#Totally Ignored / Big Media / Gossip / Paparazzi
[edge for edge in totally_ignored if edge['reciprocity_dict']['summed weights']>200][:5]

[{'focus_node': 'bestof',
  'neighbor_node': 'adviceanimals',
  'reciprocity_dict': {'reciprocity': 1.0,
   'summed weights': 217,
   'n-to-m': 217,
   'm_to_n': 0}},
 {'focus_node': 'bestof',
  'neighbor_node': 'funny',
  'reciprocity_dict': {'reciprocity': 1.0,
   'summed weights': 214,
   'n-to-m': 214,
   'm_to_n': 0}},
 {'focus_node': 'bestof',
  'neighbor_node': 'wtf',
  'reciprocity_dict': {'reciprocity': 1.0,
   'summed weights': 222,
   'n-to-m': 222,
   'm_to_n': 0}},
 {'focus_node': 'bestof',
  'neighbor_node': 'pics',
  'reciprocity_dict': {'reciprocity': 1.0,
   'summed weights': 222,
   'n-to-m': 222,
   'm_to_n': 0}},
 {'focus_node': 'bestof',
  'neighbor_node': 'politics',
  'reciprocity_dict': {'reciprocity': 1.0,
   'summed weights': 203,
   'n-to-m': 203,
   'm_to_n': 0}}]

This is beginning to look like it's all just "best_of", which makes sense, since this would aggregate things from all over but probably not be referenced back.

In [33]:
Counter([x['focus_node'] for x in [edge for edge in totally_ignored if edge['reciprocity_dict']['summed weights']>200]]).most_common()

[('circlebroke2', 7),
 ('bestof', 6),
 ('shitredditsays', 6),
 ('switcharoo', 4),
 ('shitpost', 3),
 ('shitstatistssay', 2),
 ('bestoftldr', 2),
 ('fitnesscirclejerk', 1),
 ('badphilosophy', 1),
 ('programmingcirclejerk', 1),
 ('edmprodcirclejerk', 1),
 ('mushroomkingdom', 1),
 ('bluejackets', 1),
 ('metacanada', 1),
 ('beercirclejerk', 1),
 ('trendingsubreddits', 1),
 ('bicyclingcirclejerk', 1),
 ('evenwithcontext', 1),
 ('titlegore', 1),
 ('nightlypick', 1),
 ('moronicmondayandroid', 1)]

In [34]:
Counter([x['focus_node'] for x in [edge for edge in ignored if edge['reciprocity_dict']['summed weights']>200]]).most_common()

[('bestof', 3),
 ('subredditdrama', 2),
 ('enoughlibertarianspam', 2),
 ('atletico', 1),
 ('goodshibe', 1),
 ('shitredditsays', 1),
 ('writingprompts', 1),
 ('badphilosophy', 1),
 ('japancirclejerk', 1),
 ('drugscirclejerk', 1),
 ('streetfighter', 1),
 ('shitpost', 1),
 ('runningcirclejerk', 1),
 ('babymetal', 1),
 ('mfacirclejerk', 1),
 ('baseballnightlypick', 1)]