# Project 1
## DATA 620 - June 2022
#### Benson Toi, Ahmed Elsaeyed, Noah Collin

Data Source:
https://snap.stanford.edu/data/soc-RedditHyperlinks.html

“The hyperlink network represents the directed connections between two subreddits (a subreddit is a community on Reddit). We also provide subreddit embeddings. The network is extracted from publicly available Reddit data of 2.5 years from Jan 2014 to April 2017.”


### Dependendices 


In [3]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
import urllib 
import csv
from platform import python_version

scipyImported = True
try:
    import scipy
except:
    scipyImported=False
    pass

In [4]:
print("Pandas version: {}".format(pd.__version__))
print("Networkx version: {}".format(nx.__version__))
if(scipyImported):
    print("Scipy version: {}".format(scipy.__version__))
print("Python version: {}".format(python_version()))

Pandas version: 1.1.3
Networkx version: 2.5
Scipy version: 1.5.2
Python version: 3.8.5


### Load Data

In [5]:
#If you don't have the file locally in the same folder as this notebook, set this variable to false to downlaod it
LocalFile = True

In [6]:
#use pd.read csv to get the tsv file with all our data from the internet
if(LocalFile):
    url = "soc-redditHyperlinks-body.tsv"
else:
    url="http://snap.stanford.edu/data/soc-redditHyperlinks-body.tsv"
reddit_data_df=pd.read_csv(url, sep='\t')

# getting the file off the interent everytime takes about 20 minutes on my connection.  

In [7]:
#quick display
reddit_data_df.head()

Unnamed: 0,SOURCE_SUBREDDIT,TARGET_SUBREDDIT,POST_ID,TIMESTAMP,LINK_SENTIMENT,PROPERTIES
0,leagueoflegends,teamredditteams,1u4nrps,2013-12-31 16:39:58,1,"345.0,298.0,0.75652173913,0.0173913043478,0.08..."
1,theredlion,soccer,1u4qkd,2013-12-31 18:18:37,-1,"101.0,98.0,0.742574257426,0.019801980198,0.049..."
2,inlandempire,bikela,1u4qlzs,2014-01-01 14:54:35,1,"85.0,85.0,0.752941176471,0.0235294117647,0.082..."
3,nfl,cfb,1u4sjvs,2013-12-31 17:37:55,1,"1124.0,949.0,0.772241992883,0.0017793594306,0...."
4,playmygame,gamedev,1u4w5ss,2014-01-01 02:51:13,1,"715.0,622.0,0.777622377622,0.00699300699301,0...."


### " Identify and load a network dataset that has some categorical information available for each node." ✓

In [8]:
#networkx has a useful function to create a graph from almost any pd dataframe
#inputs used here are the df we want to use, the column representing the source, column representing the target
#and the weight column which is the link_sentiment we care about. You then specify the kind of graph, were using digraph

G=nx.from_pandas_edgelist(reddit_data_df, 'SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT', ['LINK_SENTIMENT'], create_using=nx.DiGraph())

### "For each of the nodes in the dataset, calculate <u>degree</u> centrality and eigenvector centrality." ✓

In [9]:
#the degree() function weve used before which is a simple count of all links in/out of the node
deg = nx.degree(G)

In [10]:
#sorting the nodes by degree (both in and out degrees)
sorted(G.degree, key=lambda x: x[1], reverse=True)

[('askreddit', 2524),
 ('iama', 2030),
 ('subredditdrama', 1804),
 ('writingprompts', 1143),
 ('outoftheloop', 1099),
 ('pics', 957),
 ('videos', 882),
 ('leagueoflegends', 833),
 ('todayilearned', 816),
 ('gaming', 810),
 ('funny', 797),
 ('conspiracy', 780),
 ('legaladvice', 710),
 ('copypasta', 693),
 ('drama', 690),
 ('worldnews', 667),
 ('explainlikeimfive', 666),
 ('the_donald', 657),
 ('mhoc', 624),
 ('news', 591),
 ('dogecoin', 584),
 ('subredditoftheday', 582),
 ('bitcoin', 559),
 ('techsupport', 553),
 ('pcmasterrace', 541),
 ('games', 534),
 ('showerthoughts', 524),
 ('askhistorians', 514),
 ('nostupidquestions', 494),
 ('tipofmypenis', 494),
 ('shitliberalssay', 493),
 ('circlejerkcopypasta', 491),
 ('technology', 490),
 ('self', 490),
 ('hailcorporate', 484),
 ('justunsubbed', 480),
 ('movies', 471),
 ('tifu', 469),
 ('circlebroke', 466),
 ('politics', 459),
 ('help', 453),
 ('science', 451),
 ('changemyview', 447),
 ('relationships', 446),
 ('adviceanimals', 440),
 ('anim

### "For each of the nodes in the dataset, calculate degreecentrality and <u>eigenvector</u>  centrality." ✓

In [11]:
#sorted eigenvectors:
dict(sorted(nx.eigenvector_centrality(G).items(), key = lambda x: x[1], reverse = True))

{'askreddit': 0.2651849714317706,
 'iama': 0.25348488091361604,
 'videos': 0.18016430348316215,
 'pics': 0.17390465153200324,
 'todayilearned': 0.16648951718218455,
 'worldnews': 0.14377204939239926,
 'funny': 0.13870523888975217,
 'news': 0.1365494961612271,
 'gaming': 0.1350220379131744,
 'technology': 0.12484434665059566,
 'explainlikeimfive': 0.12386133396713653,
 'outoftheloop': 0.11881518150022308,
 'pcmasterrace': 0.11137174134616207,
 'science': 0.1103765609877534,
 'adviceanimals': 0.10496866924088415,
 'showerthoughts': 0.10178261198509378,
 'tifu': 0.10118214019433254,
 'wtf': 0.09866637898757802,
 'movies': 0.0980174113266851,
 'dataisbeautiful': 0.09639172682906438,
 'gifs': 0.09445805809075344,
 'subredditdrama': 0.09201055112609688,
 'lifeprotips': 0.08888405815607985,
 'games': 0.08832216796540696,
 'askscience': 0.08657930005317416,
 'politics': 0.08596470600788328,
 'legaladvice': 0.08520548313809634,
 'leagueoflegends': 0.08333861808059354,
 'relationships': 0.083161

### "Compare your centrality measures across your categorical groups." ✓

In [12]:
#Postive/nuetral and negative sentiments in cross-subreddit posts.  In other words, the data is split amongst edges where the sentiment is either positive/nuetral or negative
positive_only = nx.DiGraph(((u, v, e) for u,v,e in G.edges(data=True) if e['LINK_SENTIMENT'] == 1))
negative_only = nx.DiGraph(((u, v, e) for u,v,e in G.edges(data=True) if e['LINK_SENTIMENT'] == -1))


In [13]:
print("Number of Nodes in positive_only: {}".format(len(positive_only)))
print("Number of Nodes in negative_only: {}".format(len(negative_only)))

Number of Nodes in positive_only: 34936
Number of Nodes in negative_only: 5425


In [14]:
#Sorted Eigenvector when only positive posts are consdiered
dict(sorted(nx.eigenvector_centrality(positive_only, max_iter=500).items(), key = lambda x: x[1], reverse = True))

{'iama': 0.2714736670809444,
 'askreddit': 0.25985802238349254,
 'pics': 0.17986584793023644,
 'videos': 0.17934479761842773,
 'todayilearned': 0.1640515010605289,
 'gaming': 0.14075512991113445,
 'worldnews': 0.13023767874663472,
 'funny': 0.12400078900618725,
 'explainlikeimfive': 0.12339954812047306,
 'outoftheloop': 0.12292340420263842,
 'technology': 0.12223720690436488,
 'news': 0.12126388121879822,
 'pcmasterrace': 0.11726854225424534,
 'science': 0.10334629106535505,
 'showerthoughts': 0.10036519461359594,
 'adviceanimals': 0.09943638094662569,
 'dataisbeautiful': 0.09922478285011906,
 'tifu': 0.0966421985752561,
 'movies': 0.0952678200240358,
 'lifeprotips': 0.09395765725362386,
 'leagueoflegends': 0.09181081012491718,
 'games': 0.09090403152771068,
 'wtf': 0.09010671700245765,
 'gifs': 0.08908531856421054,
 'askscience': 0.08790701678544216,
 'subredditdrama': 0.08605481936682455,
 'books': 0.08359598498206193,
 'politics': 0.08345103074687647,
 'writingprompts': 0.0800260547

In [15]:
#Sorted Eigenvector when only negative posts are consdiered
dict(sorted(nx.eigenvector_centrality(negative_only, max_iter=500).items(), key = lambda x: x[1], reverse = True))

{'askreddit': 0.2551655925847652,
 'worldnews': 0.20621059910016545,
 'funny': 0.1937536606994875,
 'news': 0.18777767946091006,
 'twoxchromosomes': 0.13128695440543173,
 'wtf': 0.13075419125922863,
 'adviceanimals': 0.1253896462326092,
 'videos': 0.1228062159951698,
 'todayilearned': 0.11414859594154636,
 'pics': 0.10954664333132917,
 'subredditdrama': 0.10808776669884149,
 'explainlikeimfive': 0.10795332676352301,
 'offmychest': 0.10602764301799009,
 'truereddit': 0.10582229325339712,
 'technology': 0.10510113264671792,
 'the_donald': 0.1032413957089842,
 'relationships': 0.10258066159646247,
 'movies': 0.10250837118389892,
 'gaming': 0.09854692129381666,
 'self': 0.0961216759440644,
 'gifs': 0.09534776977609122,
 'europe': 0.09448935217861594,
 'legaladvice': 0.09385414747117472,
 'science': 0.09372839560278655,
 'iama': 0.09300885172844667,
 'unexpected': 0.0927482336393737,
 'confession': 0.08738060414935718,
 'askhistorians': 0.08505459899370275,
 'rage': 0.08226392951747728,
 'a

In [16]:
##This takes about 2 minutes to run on my machine.  Setting K higher does more sampling, gets better result
#dict(sorted(nx.betweenness_centrality(positive_only, k =150).items(), key = lambda x: x[1], reverse = True))

In [17]:
##This takes about 2 minues to run on my machine.  Setting K higher does more sampling, gets better result
#dict(sorted(nx.betweenness_centrality(negative_only, k =1500).items(), key = lambda x: x[1], reverse = True))

In [18]:
##This took about 5 minutes to run on my machine
#sorted(nx.closeness_centrality(positive_only).items(), key = lambda x: x[1], reverse = True)

In [19]:
##This takes about a minute to run on my machine
#dict(sorted(nx.closeness_centrality(negative_only).items(), key = lambda x: x[1], reverse = True))

In [23]:
#This takes 10 minutes+ to run!
positiveDF = pd.DataFrame({"Subreddit": dict(sorted(nx.closeness_centrality(positive_only).items())).keys() ,
                           "Closeness Centrality":dict(sorted(nx.closeness_centrality(positive_only).items())).values(),
                          "Betweenness Centrality" : dict(sorted(nx.betweenness_centrality(positive_only, k =150).items())).values(),
                           "Eigenvector Centrality": dict(sorted(nx.eigenvector_centrality(positive_only, max_iter=200).items())).values()
                          })



#### Positive/Nuetral Setiment subreddit Post Centrality Measures:

In [24]:
positiveDF.sort_values(by = "Closeness Centrality", ascending = False).head(20)

Unnamed: 0,Subreddit,Closeness Centrality,Betweenness Centrality,Eigenvector Centrality
2118,askreddit,0.306423,0.058437,0.259858
14307,iama,0.299594,0.044162,0.271474
33108,videos,0.280034,5.1e-05,0.179345
22711,pics,0.278951,0.000224,0.179866
31354,todayilearned,0.275123,0.0,0.164052
21918,outoftheloop,0.268661,0.02641,0.122923
11962,gaming,0.268401,0.010968,0.140755
34217,worldnews,0.26757,0.000135,0.130238
11683,funny,0.265277,0.001342,0.124001
10181,explainlikeimfive,0.26364,0.00926,0.1234


In [25]:
positiveDF.sort_values(by = "Eigenvector Centrality", ascending = False).head(20)

Unnamed: 0,Subreddit,Closeness Centrality,Betweenness Centrality,Eigenvector Centrality
14307,iama,0.299594,0.044162,0.271474
2118,askreddit,0.306423,0.058437,0.259858
22711,pics,0.278951,0.000224,0.179866
33108,videos,0.280034,5.1e-05,0.179345
31354,todayilearned,0.275123,0.0,0.164052
11962,gaming,0.268401,0.010968,0.140755
34217,worldnews,0.26757,0.000135,0.130238
11683,funny,0.265277,0.001342,0.124001
10181,explainlikeimfive,0.26364,0.00926,0.1234
21918,outoftheloop,0.268661,0.02641,0.122923


In [26]:
#Negative Comparison Measures DF
negativeDF = pd.DataFrame({"Subreddit": dict(sorted(nx.closeness_centrality(negative_only).items())).keys() ,
                           "Closeness Centrality":dict(sorted(nx.closeness_centrality(negative_only).items())).values(),
                          "Betweenness Centrality" : dict(sorted(nx.betweenness_centrality(negative_only, k =1500).items())).values(),
                           "Eigenvector Centrality": dict(sorted(nx.eigenvector_centrality(negative_only, max_iter=250).items())).values()
                          })

#### Negative Setiment subreddit Post Centrality Measures:

In [27]:
negativeDF.sort_values(by = "Closeness Centrality", ascending = False).head(20)

Unnamed: 0,Subreddit,Closeness Centrality,Betweenness Centrality,Eigenvector Centrality
390,askreddit,0.111053,0.037798,0.255166
3248,news,0.107211,6.8e-05,0.187778
1906,funny,0.106062,0.007424,0.193754
5338,worldnews,0.105532,0.0,0.206211
5364,wtf,0.102613,0.0,0.130754
2685,legaladvice,0.098357,0.011878,0.093854
5004,truereddit,0.098115,0.000665,0.105822
3121,movies,0.097482,0.0049,0.102508
4133,science,0.09712,0.0,0.093728
5076,unexpected,0.097009,0.000117,0.092748


### Other Data Exploration

In [28]:
#The function edges enumerates all the edges with the source/target labeled
#the flag data=True displays the additional data each edge has
#In our case we created the graph with the additional data link_sentiment (we could have added data from other columns too)
#nbunch lets us specify source nodes in this example (so it doesnt take forever) 

G.edges(nbunch=["iama"], data=True)

OutEdgeDataView([('iama', 'todayilearned', {'LINK_SENTIMENT': 1}), ('iama', 'harley', {'LINK_SENTIMENT': 1}), ('iama', 'wtf', {'LINK_SENTIMENT': 1}), ('iama', '3dprinting', {'LINK_SENTIMENT': 1}), ('iama', 'videos', {'LINK_SENTIMENT': 1}), ('iama', 'politics', {'LINK_SENTIMENT': 1}), ('iama', 'india', {'LINK_SENTIMENT': 1}), ('iama', 'nationals', {'LINK_SENTIMENT': 1}), ('iama', 'gamingpc', {'LINK_SENTIMENT': 1}), ('iama', 'posthardcore', {'LINK_SENTIMENT': 1}), ('iama', 'hockey', {'LINK_SENTIMENT': 1}), ('iama', 'backtothefuture', {'LINK_SENTIMENT': 1}), ('iama', 'pics', {'LINK_SENTIMENT': 1}), ('iama', 'eldertrees', {'LINK_SENTIMENT': 1}), ('iama', 'rails', {'LINK_SENTIMENT': 1}), ('iama', 'ama', {'LINK_SENTIMENT': 1}), ('iama', 'hailcorporate', {'LINK_SENTIMENT': 1}), ('iama', 'climbing', {'LINK_SENTIMENT': 1}), ('iama', 'books', {'LINK_SENTIMENT': 1}), ('iama', 'television', {'LINK_SENTIMENT': 1}), ('iama', 'asksocialscience', {'LINK_SENTIMENT': 1}), ('iama', 'horror', {'LINK_SENTI

In [29]:
#now we can sort nodes not just by number of connections, but by the weight of each edge

sorted(G.degree(nbunch=None, weight="LINK_SENTIMENT"), key=lambda x: x[1], reverse=True)

[('askreddit', 2070),
 ('iama', 1886),
 ('outoftheloop', 969),
 ('writingprompts', 935),
 ('subredditdrama', 828),
 ('pics', 801),
 ('leagueoflegends', 753),
 ('videos', 698),
 ('gaming', 684),
 ('todayilearned', 624),
 ('funny', 601),
 ('mhoc', 580),
 ('legaladvice', 566),
 ('dogecoin', 556),
 ('conspiracy', 536),
 ('explainlikeimfive', 530),
 ('subredditoftheday', 520),
 ('techsupport', 505),
 ('bitcoin', 503),
 ('pcmasterrace', 481),
 ('games', 470),
 ('the_donald', 465),
 ('tipofmypenis', 456),
 ('worldnews', 453),
 ('justunsubbed', 436),
 ('copypasta', 427),
 ('askhistorians', 418),
 ('showerthoughts', 414),
 ('anime', 406),
 ('nostupidquestions', 404),
 ('csshelp', 404),
 ('news', 399),
 ('hailcorporate', 394),
 ('technology', 394),
 ('help', 393),
 ('science', 387),
 ('music', 382),
 ('soccer', 379),
 ('movies', 379),
 ('tifu', 377),
 ('buildapc', 370),
 ('tipofmytongue', 364),
 ('dota2', 361),
 ('self', 360),
 ('sandersforpresident', 358),
 ('books', 355),
 ('android', 355),
 (

In [30]:
#we can use the function out_degree to simply enumerate the number of outgoing connections per node
sorted(G.out_degree, key=lambda x: x[1], reverse=True)

[('subredditdrama', 1350),
 ('copypasta', 636),
 ('drama', 600),
 ('subredditoftheday', 559),
 ('outoftheloop', 507),
 ('circlejerkcopypasta', 472),
 ('tipofmypenis', 472),
 ('shitliberalssay', 470),
 ('justunsubbed', 464),
 ('conspiracy', 433),
 ('hailcorporate', 427),
 ('writingprompts', 426),
 ('circlebroke', 401),
 ('legaladvice', 384),
 ('iama', 384),
 ('spam', 376),
 ('karmacourt', 375),
 ('csshelp', 375),
 ('help', 365),
 ('askreddit', 363),
 ('bestofoutrageculture', 339),
 ('nostupidquestions', 328),
 ('bugs', 327),
 ('techsupport', 312),
 ('dogecoin', 301),
 ('self', 299),
 ('the_donald', 286),
 ('thesefuckingaccounts', 279),
 ('subredditcancer', 275),
 ('leagueoflegends', 256),
 ('postpreview', 249),
 ('subreddit_stats', 248),
 ('tipofmytongue', 236),
 ('test', 235),
 ('gaming', 226),
 ('shitredditsays', 221),
 ('profanitywatch', 221),
 ('circlebroke2', 220),
 ('askhistorians', 219),
 ('undelete', 219),
 ('badhistory', 215),
 ('explainlikeimfive', 209),
 ('entrepreneur', 206)

In [31]:
#however we can also use the weight flag to use the link_sentiment attribute 
sorted(G.out_degree(weight="LINK_SENTIMENT"), key=lambda x: x[1], reverse=False)

[('badkarma', -17),
 ('shitgreeddisastersays', -15),
 ('respectfullyyours', -12),
 ('nolibswatch', -11),
 ('randomshit', -8),
 ('christianlaughs', -8),
 ('redditliars', -7),
 ('fuckredditmods', -7),
 ('wallpaperdump', -6),
 ('shitsassay', -6),
 ('digitalcartel', -5),
 ('matthosszone', -5),
 ('semebay', -5),
 ('targetedindividuals', -5),
 ('iamnotracistbut', -5),
 ('nationalsocialism', -4),
 ('aidanpics', -4),
 ('nsa', -4),
 ('blackops', -4),
 ('predators', -3),
 ('straya', -3),
 ('cybercock', -3),
 ('maping', -3),
 ('etiquette', -3),
 ('zugorphans', -3),
 ('globalredditmeetupday', -3),
 ('topcuntsofreddit', -3),
 ('natesgate', -3),
 ('insults', -3),
 ('evilbuildings', -3),
 ('schizophreniaanxiety', -3),
 ('islamicstate', -3),
 ('fakenewswatch', -3),
 ('paypai', -3),
 ('krasnayaarmiya', -3),
 ('aznlivesmatter', -3),
 ('boxingcirclejerk', -2),
 ('srsmen', -2),
 ('subredditdramas', -2),
 ('mensrightsmeta', -2),
 ('rants', -2),
 ('transphobiaproject', -2),
 ('sexyshopping', -2),
 ('awwwtf'

In [32]:
#in_degree does the same but for incoming connections
sorted(G.in_degree(weight="LINK_SENTIMENT"), key=lambda x: x[1], reverse=True)

[('askreddit', 1789),
 ('iama', 1516),
 ('pics', 797),
 ('videos', 695),
 ('writingprompts', 637),
 ('todayilearned', 624),
 ('funny', 567),
 ('mhoc', 551),
 ('outoftheloop', 522),
 ('leagueoflegends', 521),
 ('gaming', 474),
 ('worldnews', 447),
 ('pcmasterrace', 426),
 ('news', 391),
 ('science', 378),
 ('explainlikeimfive', 355),
 ('technology', 352),
 ('bitcoin', 349),
 ('subredditdrama', 332),
 ('games', 330),
 ('showerthoughts', 320),
 ('gifs', 307),
 ('adviceanimals', 298),
 ('movies', 297),
 ('wtf', 292),
 ('politics', 290),
 ('music', 279),
 ('dogecoin', 277),
 ('conspiracy', 273),
 ('soccer', 271),
 ('books', 271),
 ('the_donald', 269),
 ('android', 268),
 ('tifu', 268),
 ('anime', 266),
 ('modelusgov', 263),
 ('legaladvice', 262),
 ('globaloffensive', 245),
 ('askscience', 242),
 ('lifeprotips', 240),
 ('askhistorians', 239),
 ('sandersforpresident', 238),
 ('nfl', 236),
 ('dota2', 232),
 ('relationships', 232),
 ('personalfinance', 230),
 ('dataisbeautiful', 230),
 ('techsu

In [33]:
sorted(positive_only.in_degree(weight="LINK_SENTIMENT"), key=lambda x: x[1], reverse=True)

[('askreddit', 1975),
 ('iama', 1581),
 ('pics', 875),
 ('videos', 787),
 ('todayilearned', 720),
 ('writingprompts', 677),
 ('funny', 662),
 ('mhoc', 573),
 ('outoftheloop', 557),
 ('worldnews', 554),
 ('leagueoflegends', 549),
 ('gaming', 529),
 ('news', 486),
 ('pcmasterrace', 452),
 ('science', 410),
 ('explainlikeimfive', 406),
 ('technology', 397),
 ('subredditdrama', 393),
 ('adviceanimals', 369),
 ('bitcoin', 364),
 ('games', 352),
 ('gifs', 351),
 ('politics', 350),
 ('showerthoughts', 347),
 ('wtf', 342),
 ('movies', 337),
 ('the_donald', 320),
 ('conspiracy', 310),
 ('tifu', 309),
 ('music', 298),
 ('legaladvice', 294),
 ('relationships', 287),
 ('soccer', 282),
 ('anime', 281),
 ('books', 281),
 ('dogecoin', 280),
 ('android', 278),
 ('modelusgov', 268),
 ('askhistorians', 267),
 ('globaloffensive', 265),
 ('askscience', 263),
 ('lifeprotips', 261),
 ('nfl', 256),
 ('dataisbeautiful', 256),
 ('sandersforpresident', 254),
 ('personalfinance', 253),
 ('dota2', 244),
 ('offmyc

In [34]:
sorted(negative_only.in_degree(weight="LINK_SENTIMENT"), key=lambda x: x[1], reverse=False)

[('askreddit', -186),
 ('worldnews', -107),
 ('todayilearned', -96),
 ('funny', -95),
 ('news', -95),
 ('videos', -92),
 ('pics', -78),
 ('adviceanimals', -71),
 ('iama', -65),
 ('subredditdrama', -61),
 ('politics', -60),
 ('gaming', -55),
 ('relationships', -55),
 ('explainlikeimfive', -51),
 ('the_donald', -51),
 ('wtf', -50),
 ('twoxchromosomes', -46),
 ('technology', -45),
 ('gifs', -44),
 ('tifu', -41),
 ('writingprompts', -40),
 ('movies', -40),
 ('conspiracy', -37),
 ('offmychest', -36),
 ('outoftheloop', -35),
 ('kotakuinaction', -35),
 ('tumblrinaction', -35),
 ('casualiama', -33),
 ('science', -32),
 ('legaladvice', -32),
 ('bestof', -31),
 ('changemyview', -30),
 ('atheism', -29),
 ('europe', -29),
 ('leagueoflegends', -28),
 ('nottheonion', -28),
 ('askhistorians', -28),
 ('showerthoughts', -27),
 ('sex', -26),
 ('mildlyinteresting', -26),
 ('canada', -26),
 ('pcmasterrace', -26),
 ('dataisbeautiful', -26),
 ('confession', -25),
 ('aww', -24),
 ('starwars', -24),
 ('enough