### Load required libraries

In [1]:
import pickle
import json
from collections import Counter

### Load links and dictionary of media bias

In [2]:
temp_links = pickle.load(open('../Results/comment_links.pickle','rb'))
media_bias = pickle.load(open('../Results/media_bias.pickle','rb')) 

### Improve preprocessing 
- Remove distinction between mobile domains and desktop domains (remove .mobile) google.mobile = google.com
- Remove distinction between country specific domains and worldwide domains google.com == google.uk

In [3]:
links = {}
for subreddit in temp_links:
    links[subreddit] = Counter()
    for link,count in temp_links[subreddit]:
        link = link.replace('www.','').replace('.com','').replace('mobile.','')\
            .replace('m.','').replace('.org','').replace('en.','').replace('.co','')\
            .replace('.uk','').replace('.edu','').replace('i.','').replace('.ca','')\
            .replace('.media','').replace('media.','').replace('.go','').replace('.upload','').replace('uk.','')
        if link == 'youtu.be':
            link = 'youtube'
        elif link == 'redd.it':
            link = 'reddit'
        elif link == 'projects.fivethirtyeight':
            link = 'fivethirtyeight'
        elif link == 'money.cnn':
            link = 'cnn'
        elif link == 'elections.huffingtonpost':
            link = 'huffingtonpost'
        elif link == 'np.reddit':
            link = 'reddit'
        elif link == 'news.bbc':
            link = 'bbc'

        links[subreddit][link] += count

### Update names of media outlets to match the name of the domain for easier detection and add some missing new outlets

In [4]:
media = {}
for outlet in media_bias:
    name = outlet['name'].lower().replace(' ','')
    if name == '':
        name = ''
    elif name == :
        name = 
    elif name == :
        name = 'nbcnews'
    elif name == 'wallstreetjournal-news':
        name = 'wsj'
    elif name == 'dailybeast':
        name = 'thedailybeast'
    elif name == 'timemagazine':
        name = 'time'
    elif name == 'thenewyorker':
        name = 'newyorker'
    elif name == 'breitbartnews':
        name = 'breitbart'
    elif name == 'bbcnews':
        name = 'bbc'
    elif name == 'newyorkmagazine':
        name = 'nymag'
    elif name == 'newyorkpost':
        name = 'nypost'
    elif name == 'buzzfeednews':
        name = 'buzzfeed'
    elif name == 'thedailycaller':
        name = 'dailycaller'
    elif name == 'newyorkdailynews':
        name = 'nydailynews'
    elif name == 'thetelegraph-uk':
        name = 'telegraph'
    elif name == 'huffpost':
        name = 'huffingtonpost'
    elif name == 'losangelestimes':
        name = 'latimes'
    elif name == 'theeconomist':
        name = 'economist'
    elif name == 'nprnews':
        name = 'npr'
    elif name == 'thedailywire':
        name = 'dailywire'
    elif name == 'financialtimes':
        name = 'ft'
    media[name] = outlet

In [16]:
media.update({
    'newstatesman': {'bias':'Left'},
    'mirror': {'bias':'Left'},
    'guardian': {'bias':'Left'},
    'theconservativetreehouse': {'bias':'Right'},
    'wikileaks': {'bias':'none'},
    'thetimes': {'bias':'Right'},
    'independent': {'bias':'Left'},
    'express': {'bias':'Right'},
    'mises': {'bias':'Right'},
    'cato': {'bias':'Right'}
    
})

### Identify links to news outlets and make sure we found all of them

In [19]:
for subreddit in links:
    if subreddit not in ['Republican','democrats','hillaryclinton', \
        'The_Farage','Le_Pen','altright','progressive','LateStageCapitalism']:
        print('_______ _________ _________')
        print('__ _____ _____ ________ _____')
        print(subreddit)
        print('___ _________ ________ _____')
        print('_______ _________ _________')
        for key,(link,count) in enumerate(links[subreddit].most_common(1000)):
            print(link,links[subreddit][link])
            if link in media:
                print('identified',media[link]['bias'])
            print('_________________')
            if key == 40:
                break

_______ _________ _________
__ _____ _____ ________ _____
politics
___ _________ ________ _____
_______ _________ _________
reddit 973017
_________________
youtube 333488
_________________
wikipedia 212867
_________________
imgur 143046
_________________
twitter 102424
_________________
washingtonpost 70939
identified LeanLeft
_________________
nytimes 64242
identified LeanLeft
_________________
cnn 43315
identified Left
_________________
politico 34240
identified LeanLeft
_________________
google 33880
_________________
thehill 27357
identified Center
_________________
politifact 24186
identified LeanLeft
_________________
fivethirtyeight 22351
identified Center
_________________
theguardian 21153
identified LeanLeft
_________________
npr 19068
identified Center
_________________
huffingtonpost 17755
identified Left
_________________
businessinsider 16005
identified Center
_________________
snopes 15881
_________________
theatlantic 14465
identified LeanLeft
_________________
giphy 14

### Extract the top 15 most shared new outlets

In [21]:
for subreddit in['SandersForPresident','politics','Conservative','The_Donald']:
    i = 0
    pol_links[subreddit] = []
    for link,count in links[subreddit].most_common(1000):
        if link in media:
            pol_links[subreddit].append(link)
            i += 1
        if i == 15:
            break

### Generate Latex Tables

In [23]:
line = []
for key,subreddit in enumerate(target):
    line.append(subreddit.replace('_','\_'))
    if key < len(pol_links) - 1:
        line.append("&")
    else:
        line.append("\\\\")
print(" ".join(line))

print("\\hline")

for i in range(15):
    line = []
    for key,subreddit in enumerate(target):
        if "Left" in media[pol_links[subreddit][i]]['bias']:
            line.append("\\cellcolor{democratic}")
        elif "Right" in media[pol_links[subreddit][i]]['bias']:
            line.append("\\cellcolor{republican}")
        elif "none" in media[pol_links[subreddit][i]]['bias']:
            z = 0
        else:
            line.append("\\cellcolor{neutral}")
        line.append(pol_links[subreddit][i])
        if key < len(pol_links) - 1:
            line.append("&")
        else:
            line.append("\\\\")
    print(" ".join(line))

SandersForPresident & politics & Conservative & The\_Donald \\
\hline
\cellcolor{democratic} nytimes & \cellcolor{democratic} washingtonpost & \cellcolor{democratic} washingtonpost & wikileaks \\
\cellcolor{democratic} washingtonpost & \cellcolor{democratic} nytimes & \cellcolor{democratic} nytimes & \cellcolor{republican} breitbart \\
\cellcolor{democratic} huffingtonpost & \cellcolor{democratic} cnn & \cellcolor{democratic} cnn & \cellcolor{republican} dailymail \\
\cellcolor{neutral} fivethirtyeight & \cellcolor{democratic} politico & \cellcolor{democratic} politico & \cellcolor{republican} foxnews \\
\cellcolor{democratic} cnn & \cellcolor{neutral} thehill & \cellcolor{republican} foxnews & \cellcolor{democratic} nytimes \\
\cellcolor{democratic} politico & \cellcolor{democratic} politifact & \cellcolor{neutral} realclearpolitics & \cellcolor{republican} dailycaller \\
\cellcolor{democratic} theguardian & \cellcolor{neutral} fivethirtyeight & \cellcolor{republican} breitbart & \cel