In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import sys
import os
import csv
import json
import logging
from common_utils import readcol, gentweets, write_csv, filter_dataset
from urls import domain

In [4]:
logging.basicConfig(level=logging.DEBUG)

In [5]:
def read_polscores(filepath):
    polscores = {}
    with open(filepath, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        next(reader) # skip header

        for row in reader:
            if row[0].strip().lower() in polscores:
                logging.debug('Polscore already read for {}'.format(row[0]))
            else:
                polscores[row[0].strip().lower()] = float(row[1])
    return polscores

In [6]:
def compute_partisanship(data, polscores, min_num_tweets, take_abs=False):
    total_counts = {}
    news_visits = {}
    for uid in data:
        assert uid not in total_counts
        assert uid not in news_visits
        
        total_counts[uid] = 0
        news_visits[uid] = {}
        
        for tweet in data[uid]:
            for raw_domain in tweet['domains']:
                total_counts[uid] += 1
                d = domain(raw_domain)
                if d in polscores:
                    if d not in news_visits[uid]:
                        news_visits[uid][d] = 0
                    news_visits[uid][d] += 1

    scores = {}
    for uid in news_visits:
        total_news = sum(news_visits[uid].values())
        total = total_counts[uid]

        if total_news < min_num_tweets:
            print('User {} does not have the required minimum number of tweets ({}/{}). Skipping.'.format(
                uid, total_news, min_num_tweets
            ))
            continue
        else:
            scores[uid] = 0
            for url in news_visits[uid]:
                denom = total # denom = total_news
                if take_abs:
                    scores[uid] += (news_visits[uid][url] / denom) * abs(polscores[url])
                else: 
                    scores[uid] += (news_visits[uid][url] / denom) * polscores[url]
                
                #
    return scores

In [7]:
def create_exposure_dataset(data, friends):
    exposure_data = {}
    for raw_uid in data:
        uid = int(raw_uid)
        if raw_uid not in friends:
            print('No friends in dataset. Skipping.')
            continue
        exposures = []
        for uid in friends[uid]:
            fuid = int(raw_fuid)
            if fuid in data:
                exposure_data.extend(data[fuid])
        exposure_data[uid] = exposures
    return exposure_data

In [8]:
MIN_NUM_TWEETS = 8
keep_retweets = True

with open(os.path.join(os.getenv('D'), 'stripped-dataset-no-bots.json'), 'r') as f:
    js_data = json.loads(f.read())
data = filter_dataset(js_data, keep_standalone=True, keep_retweet=keep_retweets, keep_quote=True)
polscores = read_polscores(os.path.join(os.getenv('D'), 'sources', 'news.tab'))

subf = 'with-retweets' if keep_retweets else 'without-retweets'

dest = os.path.join(os.getenv('D'), 'measures', subf, 'partisanship.tab')
scores = compute_partisanship(data, polscores, MIN_NUM_TWEETS, take_abs=False)
if not os.path.exists(os.path.dirname(dest)):
    os.makedirs(os.path.dirname(dest))
write_csv(dest, [(uid, score) for uid, score in scores.items()], ['Twitter ID', 'Partisanship'])

dest = os.path.join(os.getenv('D'), 'measures', subf, 'partisanship-abs.tab')
scores = compute_partisanship(data, polscores, MIN_NUM_TWEETS, take_abs=True)
if not os.path.exists(os.path.dirname(dest)):
    os.makedirs(os.path.dirname(dest))
write_csv(dest, [(uid, score) for uid, score in scores.items()], ['Twitter ID', 'Abs Partisanship'])

Skipped counts: {'retweet': 0, 'standalone': 0, 'quote': 0}
User 3154586962 does not have the required minimum number of tweets (5/8). Skipping.
User 3154586962 does not have the required minimum number of tweets (5/8). Skipping.


In [25]:
with open(os.path.join(os.getenv('D'), 'friends-reduced.json'), 'r') as f:
    friends = json.loads(f.read())
exposure_data = create_exposure_dataset(data, friends)

dest = os.path.join(os.getenv('D'), 'measures', 'exposure', subf, 'partisanship.tab')
scores = compute_partisanship(exposure_data, polscores, MIN_NUM_TWEETS, take_abs=False)
if not os.path.exists(os.path.dirname(dest)):
    os.makedirs(os.path.dirname(dest))
write_csv(dest, [(uid, score) for uid, score in scores.items()], ['Twitter ID', 'Partisanship'])

dest = os.path.join(os.getenv('D'), 'measures', 'exposure', subf, 'partisanship-abs.tab')
scores = compute_partisanship(exposure_data, polscores, MIN_NUM_TWEETS, take_abs=True)
if not os.path.exists(os.path.dirname(dest)):
    os.makedirs(os.path.dirname(dest))
write_csv(dest, [(uid, score) for uid, score in scores.items()], ['Twitter ID', 'Abs Partisanship'])

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends

No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends in dataset. Skipping.
No friends